version 0.17 - use the size information in release_metadata.xml, treat checksum errors as failures
authorWilliam Roberts <williamr@symbian.org>
Tue, 29 Jun 2010 17:58:44 +0100 (2010-06-29)
changeset 300 3b8bce67b574
parent 299 4a2d7d29da0f
child 301 1b8ed3598305
version 0.17 - use the size information in release_metadata.xml, treat checksum errors as failures Give a clear statement of success or failure after all the unzipping (if any) is completed This version will also resume any interrupted transfers (unless you say --noresume) based on the file sizes.
downloadkit/downloadkit.py
--- a/downloadkit/downloadkit.py	Tue Jun 29 18:09:01 2010 +0100
+++ b/downloadkit/downloadkit.py	Tue Jun 29 17:58:44 2010 +0100
@@ -26,12 +26,13 @@
 import hashlib
 import xml.etree.ElementTree as ET 
 
-version = '0.16'
+version = '0.17'
 user_agent = 'downloadkit.py script v' + version
 headers = { 'User-Agent' : user_agent }
 top_level_url = "https://developer.symbian.org"
 passman = urllib2.HTTPPasswordMgrWithDefaultRealm()	# not relevant for live Symbian website
 download_list = []
+failure_list = []
 unzip_list = []
 
 def build_opener(debug=False):
@@ -253,6 +254,7 @@
 	return md5.hexdigest().upper()
 
 checksums = {}
+filesizes = {}
 def parse_release_metadata(filename):
 	if os.path.exists(filename):
 		tree = ET.parse(filename)
@@ -262,18 +264,31 @@
 				file = element.get("name")
 				md5 = element.get("md5checksum")
 				checksums[file] = md5.upper()
+				size = element.get("size")
+				filesizes[file] = int(size)
 
 def download_file(filename,url):
 	global options
 	global checksums
+	global filesizes
+	resume_start = 0
 	if os.path.exists(filename):
 		if filename in checksums:
 			print 'Checking existing ' + filename
-			file_checksum = md5_checksum(filename)
-			if file_checksum == checksums[filename]:
+			file_size = os.stat(filename).st_size
+			if file_size == filesizes[filename]:
+				file_checksum = md5_checksum(filename)
+				if file_checksum == checksums[filename]:
+					if options.progress:
+						print '- OK ' + filename
+					return True
+			elif file_size < filesizes[filename]:
 				if options.progress:
-					print '- OK ' + filename
-				return True
+					print '- %s is too short' % (filename)
+				if options.debug:
+					print '- %s is %d bytes, should be %d bytes' % (filename, file_size, filesizes[filename])
+				if options.resume:
+					resume_start = file_size
 
 	if options.dryrun and not re.match(r"release_metadata", filename):
 		global download_list
@@ -283,7 +298,10 @@
 
 	print 'Downloading ' + filename
 	global headers
-	req = urllib2.Request(url, None, headers)
+	request_headers = headers.copy()		# want a fresh copy for each request
+	if resume_start > 0:
+		request_headers['Range'] = "bytes=%d-%d" % (resume_start, filesizes[filename])
+	req = urllib2.Request(url, None, request_headers)
 	
 	CHUNK = 128 * 1024
 	size = 0
@@ -297,7 +315,7 @@
 		if chunk.find('<div id="sign_in_box">') != -1:
 			# our urllib2 cookies have gone awol - login again
 			login(False)
-			req = urllib2.Request(url, None, headers)
+			req = urllib2.Request(url, None, request_headers)
 			response = urllib2.urlopen(req)
 			chunk = response.read(CHUNK)
 			if chunk.find('<div id="sign_in_box">') != -1:
@@ -306,7 +324,16 @@
 				return False
 		info = response.info()
 		if 'Content-Length' in info:
-			filesize = int(info['Content-Length'])
+			filesize = resume_start + int(info['Content-Length'])		# NB. length of the requested content, taking into account the range
+			if resume_start > 0 and 'Content-Range' not in info:
+				# server doesn't believe in our range
+				filesize = int(info['Content-Length'])
+				if options.debug:
+					print "Server reports filesize as %d, ignoring our range request (%d-%d)" % (filesize, resume_start, filesizes[filename])
+				resume_start = 0;	# will have to download from scratch
+			if filename in filesizes:
+				if filesize != filesizes[filename]:
+					print "WARNING:  %s size %d does not match release_metadata.xml (%d)" % ( filename, filesize, filesizes[filename])
 		else:
 			match = re.search('>([^>]+Licen[^<]+)<', chunk, re.IGNORECASE)
 			if match:
@@ -328,9 +355,18 @@
 		return False
 
 	# we are now up and running, and chunk contains the start of the download
-	
+	if options.debug:
+		print "\nReading %s from effective URL %s" % (filename, response.geturl())
+
 	try:
-		fp = open(filename, 'wb')
+		if resume_start > 0:
+			fp = open(filename, 'a+b')	# append to existing content
+			if options.progress:
+				print " - Resuming at offset %d" % (resume_start)
+				size = resume_start
+				last_size = size
+		else:
+			fp = open(filename, 'wb')		# write new file
 		md5 = hashlib.md5()
 		while True:
 			fp.write(chunk)
@@ -354,9 +390,6 @@
 			if not chunk: break
 
 		fp.close()
-		if options.progress:
-			now = time.time()
-			print "- Completed %s - %d Kb in %d seconds" % (filename, (filesize/1024)+0.5, now-start_time)
 
 	#handle errors
 	except urllib2.URLError, e:
@@ -367,16 +400,36 @@
 			print 'Error code: ', e.code
 		return False
 
+	if options.debug:
+		info = response.info()
+		print "Info from final response of transfer:"
+		print response.info()
+
+	if filesize > 0 and size != filesize:
+		print "Incomplete transfer - only received %d bytes of the expected %d byte file" % (size, filesize)
+		return False
+	
+	if options.progress:
+		now = time.time()
+		print "- Completed %s - %d Kb in %d seconds" % (filename, (filesize/1024)+0.5, now-start_time)
+
 	if filename in checksums:
 		download_checksum = md5.hexdigest().upper()
+		if resume_start > 0:
+			# did a partial download, so need to checksum the whole file
+			download_checksum = md5_checksum(filename)
 		if download_checksum != checksums[filename]:
-			print '- WARNING: %s checksum does not match' % filename
+			if options.debug:
+				print '- Checksum for %s was %s, expected %s' % (filename, download_checksum, checksums[filename])
+			print '- ERROR: %s checksum does not match' % filename
+			return False
 
 	return True
 
 def downloadkit(version):	
 	global headers
 	global options
+	global failure_list
 	urlbase = top_level_url + '/main/tools_and_kits/downloads/'
 
 	viewid = 5   # default to Symbian^3
@@ -439,6 +492,7 @@
 		if options.noarmv5 and options.nowinscw and re.search(r"binaries_epoc.zip|binaries_epoc_sdk", filename) :
 			continue 	# skip binaries_epoc and binaries_sdk ...
 		if download_file(filename, downloadurl) != True :
+			failure_list.append(filename)
 			continue # download failed
 
 		# unzip the file (if desired)
@@ -457,6 +511,13 @@
 	# wait for the unzipping threads to complete
 	complete_outstanding_unzips()  
 
+	if len(failure_list) > 0:
+		print "\n"
+		print "Downloading completed, with failures in %d files\n" % (len(failure_list))
+		print "\n\t".join(failure_list)
+		print "\n"
+	elif not options.dryrun:
+		print "\nDownloading completed successfully"
 	return 1
 
 parser = OptionParser(version="%prog "+version, usage="Usage: %prog [options] version")
@@ -482,6 +543,8 @@
 	help="debug HTML traffic (not recommended!)")
 parser.add_option("--webhost", dest="webhost", metavar="SITE",
 	help="use alternative website (for testing!)")
+parser.add_option("--noresume", action="store_false", dest="resume",
+	help="Do not attempt to continue a previous failed transfer")
 parser.set_defaults(
 	dryrun=False, 
 	nosrc=False, 
@@ -493,6 +556,7 @@
 	username='',
 	password='',
 	webhost = 'developer.symbian.org',
+	resume=True,
 	debug=False
 	)