downloadkit/downloadkit.py
changeset 157 0df3a90af030
parent 155 b6c06a8333fb
child 161 fed3f1d2c557
equal deleted inserted replaced
156:753418a2eb15 157:0df3a90af030
    20 import getpass
    20 import getpass
    21 import re
    21 import re
    22 import time
    22 import time
    23 from BeautifulSoup import BeautifulSoup
    23 from BeautifulSoup import BeautifulSoup
    24 from optparse import OptionParser
    24 from optparse import OptionParser
       
    25 import hashlib
       
    26 import xml.etree.ElementTree as ET 
    25 
    27 
    26 user_agent = 'downloadkit.py script'
    28 user_agent = 'downloadkit.py script'
    27 headers = { 'User-Agent' : user_agent }
    29 headers = { 'User-Agent' : user_agent }
    28 top_level_url = "http://developer.symbian.org"
    30 top_level_url = "http://developer.symbian.org"
    29 download_list = []
    31 download_list = []
   160 	help.close()
   162 	help.close()
   161 	return False
   163 	return False
   162 
   164 
   163 def orderResults(x,y) :
   165 def orderResults(x,y) :
   164 	def ranking(name) :
   166 	def ranking(name) :
       
   167 		# 0th = release_metadata
       
   168 		if re.match(r"release_metadata", name):
       
   169 			return 0000;
   165 		# 1st = release_metadata, build_BOM.zip (both small things!)
   170 		# 1st = release_metadata, build_BOM.zip (both small things!)
   166 		if re.match(r"(build_BOM|release_metadata)", name):
   171 		if re.match(r"build_BOM", name):
   167 			return 1000;
   172 			return 1000;
   168 		# 2nd = tools, binaries (required for execution and compilation)
   173 		# 2nd = tools, binaries (required for execution and compilation)
   169 		elif re.match(r"(binaries_|tools_)", name):
   174 		elif re.match(r"(binaries_|tools_)", name):
   170 			return 2000;
   175 			return 2000;
   171 		# 3rd = rnd binaries, binary patches
   176 		# 3rd = rnd binaries, binary patches
   181 		return 10000;
   186 		return 10000;
   182 	xtitle = x['title']
   187 	xtitle = x['title']
   183 	ytitle = y['title']
   188 	ytitle = y['title']
   184 	return cmp(ranking(xtitle)+cmp(xtitle,ytitle), ranking(ytitle))
   189 	return cmp(ranking(xtitle)+cmp(xtitle,ytitle), ranking(ytitle))
   185 
   190 
       
   191 def md5_checksum(filename):
       
   192 	MD5_BLOCK_SIZE = 128 * 1024
       
   193 	md5 = hashlib.md5()
       
   194 	try:
       
   195 		file = open(filename,"rb")
       
   196 	except IOError:
       
   197 		print "Terminating script: Unable to open %S" % filename
       
   198 		sys.exit()
       
   199 	while True:
       
   200 		data = file.read(MD5_BLOCK_SIZE)
       
   201 		if not data:
       
   202 			break
       
   203 		md5.update(data)
       
   204 	file.close()
       
   205 	return md5.hexdigest().upper()
       
   206 
       
   207 checksums = {}
       
   208 def parse_release_metadata(filename):
       
   209 	if os.path.exists(filename):
       
   210 		tree = ET.parse(filename)
       
   211 		iter = tree.getiterator('package')
       
   212 		for element in iter:
       
   213 			if element.keys():
       
   214 				file = element.get("name")
       
   215 				md5 = element.get("md5checksum")
       
   216 				checksums[file] = md5.upper()
       
   217 
   186 def download_file(filename,url):
   218 def download_file(filename,url):
   187 	global options
   219 	global options
   188 	if options.dryrun :
   220 	global checksums
       
   221 	if os.path.exists(filename):
       
   222 		if filename in checksums:
       
   223 			print 'Checking existing ' + filename
       
   224 			file_checksum = md5_checksum(filename)
       
   225 			if file_checksum == checksums[filename]:
       
   226 				if options.progress:
       
   227 					print '- OK ' + filename
       
   228 				return True
       
   229 
       
   230 	if options.dryrun and not re.match(r"release_metadata", filename):
   189 		global download_list
   231 		global download_list
   190 		download_info = "download %s %s" % (filename, url)
   232 		download_info = "download %s %s" % (filename, url)
   191 		download_list.append(download_info)
   233 		download_list.append(download_info)
   192 		return True
   234 		return True
   193 	
   235 
   194 	print 'Downloading ' + filename
   236 	print 'Downloading ' + filename
   195 	global headers
   237 	global headers
   196 	req = urllib2.Request(url, None, headers)
   238 	req = urllib2.Request(url, None, headers)
   197 	
   239 	
       
   240 	CHUNK = 128 * 1024
       
   241 	size = 0
       
   242 	filesize = -1
       
   243 	start_time = time.time()
       
   244 	last_time = start_time
       
   245 	last_size = size
   198 	try:
   246 	try:
   199 		response = urllib2.urlopen(req)
   247 		response = urllib2.urlopen(req)
   200 		CHUNK = 128 * 1024
   248 		chunk = response.read(CHUNK)
   201 		size = 0
   249 		if chunk.find('<div id="sign_in_box">') != -1:
   202 		filesize = -1
   250 			# our urllib2 cookies have gone awol - login again
   203 		last_time = time.time()
   251 			login(False)
   204 		last_size = size
   252 			req = urllib2.Request(url, None, headers)
       
   253 			response = urllib2.urlopen(req)
       
   254 			chunk = response.read(CHUNK)
       
   255 			if chunk.find('<div id="sign_in_box">') != -1:
       
   256 				# still broken - give up on this one
       
   257 				print "*** ERROR trying to download %s" % (filename)
       
   258 				return False
       
   259 		info = response.info()
       
   260 		if 'Content-Length' in info:
       
   261 			filesize = int(info['Content-Length'])
       
   262 		else:
       
   263 			print "*** HTTP response did not contain 'Content-Length' when expected"
       
   264 			print info
       
   265 			return False
       
   266 
       
   267 	except urllib2.HTTPError, e:
       
   268 		print "HTTP Error:",e.code , url
       
   269 		return False
       
   270 	except urllib2.URLError, e:
       
   271 		print "URL Error:",e.reason , url
       
   272 		return False
       
   273 
       
   274 	# we are now up and running, and chunk contains the start of the download
       
   275 	
       
   276 	try:
   205 		fp = open(filename, 'wb')
   277 		fp = open(filename, 'wb')
       
   278 		md5 = hashlib.md5()
   206 		while True:
   279 		while True:
   207 			chunk = response.read(CHUNK)
       
   208 			if not chunk: break
       
   209 			if size == 0 and chunk.find('<div id="sign_in_box">') != -1:
       
   210 				# our urllib2 cookies have gone awol - login again
       
   211 				login(False)
       
   212 				req = urllib2.Request(url, None, headers)
       
   213 				response = urllib2.urlopen(req)
       
   214 				chunk = response.read(CHUNK)
       
   215 				if chunk.find('<div id="sign_in_box">') != -1:
       
   216 					# still broken - give up on this one
       
   217 					print "*** ERROR trying to download %s" % (filename)
       
   218 					break;
       
   219 			if size == 0:
       
   220 				info = response.info()
       
   221 				if 'Content-Length' in info:
       
   222 					filesize = int(info['Content-Length'])
       
   223 				else:
       
   224 					print "*** HTTP response did not contain 'Content-Length' when expected"
       
   225 					print info
       
   226 					break
       
   227 			fp.write(chunk)
   280 			fp.write(chunk)
       
   281 			md5.update(chunk)
   228 			size += len(chunk)
   282 			size += len(chunk)
   229 			now = time.time()
   283 			now = time.time()
   230 			if options.progress and now-last_time > 20:
   284 			if options.progress and now-last_time > 20:
   231 				rate = (size-last_size)/(now-last_time)
   285 				rate = (size-last_size)/(now-last_time)
   232 				estimate = ""
   286 				estimate = ""
   238 						remaining = "%d seconds" % remaining_seconds
   292 						remaining = "%d seconds" % remaining_seconds
   239 					estimate = "- %d%% est. %s" % ((100*size/filesize), remaining)
   293 					estimate = "- %d%% est. %s" % ((100*size/filesize), remaining)
   240 				print "- %d Kb (%d Kb/s) %s" % (size/1024, (rate/1024)+0.5, estimate)
   294 				print "- %d Kb (%d Kb/s) %s" % (size/1024, (rate/1024)+0.5, estimate)
   241 				last_time = now
   295 				last_time = now
   242 				last_size = size
   296 				last_size = size
       
   297 			chunk = response.read(CHUNK)
       
   298 			if not chunk: break
       
   299 
   243 		fp.close()
   300 		fp.close()
   244 		if options.progress:
   301 		if options.progress:
   245 			now = time.time()
   302 			now = time.time()
   246 			print "- Completed %s - %d Kb in %d seconds" % (filename, (filesize/1024)+0.5, now-last_time)
   303 			print "- Completed %s - %d Kb in %d seconds" % (filename, (filesize/1024)+0.5, now-start_time)
   247 
   304 
   248 	#handle errors
   305 	#handle errors
   249 	except urllib2.HTTPError, e:
   306 	except urllib2.HTTPError, e:
   250 		print "HTTP Error:",e.code , url
   307 		print "HTTP Error:",e.code , url
   251 		return False
   308 		return False
   252 	except urllib2.URLError, e:
   309 	except urllib2.URLError, e:
   253 		print "URL Error:",e.reason , url
   310 		print "URL Error:",e.reason , url
   254 		return False
   311 		return False
       
   312 
       
   313 	if filename in checksums:
       
   314 		download_checksum = md5.hexdigest().upper()
       
   315 		if download_checksum != checksums[filename]:
       
   316 			print '- WARNING: %s checksum does not match' % filename
       
   317 
   255 	return True
   318 	return True
   256 
   319 
   257 def downloadkit(version):	
   320 def downloadkit(version):	
   258 	global headers
   321 	global headers
   259 	global options
   322 	global options
   296 
   359 
   297 		# unzip the file (if desired)
   360 		# unzip the file (if desired)
   298 		if re.match(r"patch", filename):
   361 		if re.match(r"patch", filename):
   299 			complete_outstanding_unzips()	# ensure that the thing we are patching is completed first
   362 			complete_outstanding_unzips()	# ensure that the thing we are patching is completed first
   300 			
   363 			
   301 		if re.match(r"(bin|tools).*\.zip", filename):
   364 		if re.match(r"release_metadata", filename):
       
   365 			parse_release_metadata(filename)	# read the md5 checksums etc
       
   366 		elif re.match(r"(bin|tools).*\.zip", filename):
   302 			schedule_unzip(filename, 1, 0)   # unzip once, don't delete
   367 			schedule_unzip(filename, 1, 0)   # unzip once, don't delete
   303 		elif re.match(r"src_.*\.zip", filename):
   368 		elif re.match(r"src_.*\.zip", filename):
   304 			schedule_unzip(filename, 1, 1)   # zip of zips, delete top level
   369 			schedule_unzip(filename, 1, 1)   # zip of zips, delete top level
   305 		elif re.match(r"build_BOM.zip", filename):
   370 		elif re.match(r"build_BOM.zip", filename):
   306 			schedule_unzip(filename, 1, 1)   # unpack then delete zip as it's not needed again
   371 			schedule_unzip(filename, 1, 1)   # unpack then delete zip as it's not needed again
   308 	# wait for the unzipping threads to complete
   373 	# wait for the unzipping threads to complete
   309 	complete_outstanding_unzips()  
   374 	complete_outstanding_unzips()  
   310 
   375 
   311 	return 1
   376 	return 1
   312 
   377 
   313 parser = OptionParser(version="%prog 0.6.1", usage="Usage: %prog [options] version")
   378 parser = OptionParser(version="%prog 0.7", usage="Usage: %prog [options] version")
   314 parser.add_option("-n", "--dryrun", action="store_true", dest="dryrun",
   379 parser.add_option("-n", "--dryrun", action="store_true", dest="dryrun",
   315 	help="print the files to be downloaded, the 7z commands, and the recommended deletions")
   380 	help="print the files to be downloaded, the 7z commands, and the recommended deletions")
   316 parser.add_option("--nosrc", action="store_true", dest="nosrc",
   381 parser.add_option("--nosrc", action="store_true", dest="nosrc",
   317 	help="Don't download any of the source code available directly from Mercurial")
   382 	help="Don't download any of the source code available directly from Mercurial")
   318 parser.add_option("--nounzip", action="store_true", dest="nounzip",
   383 parser.add_option("--nounzip", action="store_true", dest="nounzip",