diff -r 753418a2eb15 -r 0df3a90af030 downloadkit/downloadkit.py --- a/downloadkit/downloadkit.py Tue Feb 09 12:50:02 2010 +0000 +++ b/downloadkit/downloadkit.py Tue Feb 09 17:40:51 2010 +0000 @@ -22,6 +22,8 @@ import time from BeautifulSoup import BeautifulSoup from optparse import OptionParser +import hashlib +import xml.etree.ElementTree as ET user_agent = 'downloadkit.py script' headers = { 'User-Agent' : user_agent } @@ -162,8 +164,11 @@ def orderResults(x,y) : def ranking(name) : + # 0th = release_metadata + if re.match(r"release_metadata", name): + return 0000; # 1st = release_metadata, build_BOM.zip (both small things!) - if re.match(r"(build_BOM|release_metadata)", name): + if re.match(r"build_BOM", name): return 1000; # 2nd = tools, binaries (required for execution and compilation) elif re.match(r"(binaries_|tools_)", name): @@ -183,48 +188,97 @@ ytitle = y['title'] return cmp(ranking(xtitle)+cmp(xtitle,ytitle), ranking(ytitle)) +def md5_checksum(filename): + MD5_BLOCK_SIZE = 128 * 1024 + md5 = hashlib.md5() + try: + file = open(filename,"rb") + except IOError: + print "Terminating script: Unable to open %S" % filename + sys.exit() + while True: + data = file.read(MD5_BLOCK_SIZE) + if not data: + break + md5.update(data) + file.close() + return md5.hexdigest().upper() + +checksums = {} +def parse_release_metadata(filename): + if os.path.exists(filename): + tree = ET.parse(filename) + iter = tree.getiterator('package') + for element in iter: + if element.keys(): + file = element.get("name") + md5 = element.get("md5checksum") + checksums[file] = md5.upper() + def download_file(filename,url): global options - if options.dryrun : + global checksums + if os.path.exists(filename): + if filename in checksums: + print 'Checking existing ' + filename + file_checksum = md5_checksum(filename) + if file_checksum == checksums[filename]: + if options.progress: + print '- OK ' + filename + return True + + if options.dryrun and not re.match(r"release_metadata", filename): global download_list download_info = "download %s %s" % (filename, url) download_list.append(download_info) return True - + print 'Downloading ' + filename global headers req = urllib2.Request(url, None, headers) + CHUNK = 128 * 1024 + size = 0 + filesize = -1 + start_time = time.time() + last_time = start_time + last_size = size try: response = urllib2.urlopen(req) - CHUNK = 128 * 1024 - size = 0 - filesize = -1 - last_time = time.time() - last_size = size - fp = open(filename, 'wb') - while True: + chunk = response.read(CHUNK) + if chunk.find('