Improved the script to show tech domain and package owner
authorSebastian Brannstrom <sebastianb@symbian.org>
Thu, 26 Nov 2009 17:51:27 +0000
changeset 16 b31eb4818219
parent 15 4b202623a507
child 17 ba7ab4ab5087
Improved the script to show tech domain and package owner
scripts/python/findpackage/findpackage.py
--- a/scripts/python/findpackage/findpackage.py	Fri Nov 20 16:01:12 2009 +0000
+++ b/scripts/python/findpackage/findpackage.py	Thu Nov 26 17:51:27 2009 +0000
@@ -1,178 +1,248 @@
-# findpackage.py - finds which Symbian package contains a file (if any) by searching opengrok
-
-import urllib2
-import urllib
-import os.path
-import cookielib
-import sys
-import getpass
-from BeautifulSoup import BeautifulSoup
-
-user_agent = 'findpackage.py script'
-headers = { 'User-Agent' : user_agent }
-top_level_url = "http://developer.symbian.org"
-
-COOKIEFILE = 'cookies.lwp'
-# the path and filename to save your cookies in
-
-# importing cookielib worked
-urlopen = urllib2.urlopen
-Request = urllib2.Request
-cj = cookielib.LWPCookieJar()
-
-# This is a subclass of FileCookieJar
-# that has useful load and save methods
-if os.path.isfile(COOKIEFILE):
-	cj.load(COOKIEFILE)
-	
-# Now we need to get our Cookie Jar
-# installed in the opener;
-# for fetching URLs
-opener = urllib2.build_opener(urllib2.HTTPCookieProcessor(cj))
-urllib2.install_opener(opener)
-
-def login():
-	loginurl = 'https://developer.symbian.org/main/user_profile/login.php'
-	
-	print >> sys.stderr, 'username: ',
-	username=sys.stdin.readline().strip()
-	password=getpass.getpass()
-	
-	values = {'username' : username,
-	          'password' : password,
-	          'submit': 'Login'}
-	          
-	headers = { 'User-Agent' : user_agent }
-	
-	
-	data = urllib.urlencode(values)
-	req = urllib2.Request(loginurl, data, headers)
-
-	response = urllib2.urlopen(req)
-	doc=response.read()      
-
-	if doc.find('Please try again') != -1:
-		print >> sys.stderr, 'Login failed'
-		return False
-	
-	cj.save(COOKIEFILE) 
-	return True
-
-def findpackageforlibrary(filename, project):
-
-	dotpos = filename.find('.')
-	
-	if dotpos != -1:
-		searchterm = filename[0:dotpos]
-	else:
-		searchterm = filename
-		
-	searchurl = 'https://developer.symbian.org/xref/sfl/search?q="TARGET+%s"&defs=&refs=&path=&hist=&project=%%2F%s'
-	url = searchurl % (searchterm, project)
-	req = urllib2.Request(url)
-	
-	response = urllib2.urlopen(req)
-	
-	doc=response.read()
-	
-	if doc.find('Restricted access') != -1:
-		if(login()):
-			# try again after login
-			response = urllib2.urlopen(req)
-			doc=response.read()
-		else:
-			return False
-			
-	
-	# BeatifulSoup chokes on some javascript, so we cut away everything before the <body>
-	try:
-		bodystart=doc.find('<body>')
-		doc = doc[bodystart:]
-	except:
-		pass
-			
-	soup=BeautifulSoup(doc)
-	
-	# let's hope the HTML format never changes...
-	results=soup.findAll('div', id='results')
-	pkgname=''
-	try:
-		temp=results[0].a.string
-		fspos=temp.find('sf')
-		temp=temp[fspos+3:]
-		pkgpos=temp.find('/')
-		temp=temp[pkgpos+1:]
-	
-		endpkgpos=temp.find('/')
-		pkgname=temp[0:endpkgpos]
-	except:
-		print 'error: file \'%s\' not found in opengrok' % filename
-	else:
-		print 'first package with target %s: %s' % (searchterm,pkgname)
-	
-	return True
-			
-def findpackageforheader(filename, project):
-	searchterm=filename
-	searchurl = 'https://developer.symbian.org/xref/sfl/search?q=&defs=&refs=&path=%s&hist=&project=%%2F%s'
-	url = searchurl % (searchterm, project)
-
-	req = urllib2.Request(url)
-	
-	response = urllib2.urlopen(req)
-	
-	doc=response.read()
-	
-	if doc.find('Restricted access') != -1:
-		if(login()):
-			# try again after login
-			response = urllib2.urlopen(req)
-			doc=response.read()
-		else:
-			return False
-			
-	
-	# BeatifulSoup chokes on some javascript, so we cut away everything before the <body>
-	try:
-		bodystart=doc.find('<body>')
-		doc = doc[bodystart:]
-	except:
-		pass
-			
-	soup=BeautifulSoup(doc)
-	
-	# let's hope the HTML format never changes...
-	results=soup.findAll('div', id='results')
-	pkgname=''
-	try:
-		temp=results[0].a.string
-		fspos=temp.find('sf')
-		temp=temp[fspos+3:]
-		pkgpos=temp.find('/')
-		temp=temp[pkgpos+1:]
-	
-		endpkgpos=temp.find('/')
-		pkgname=temp[0:endpkgpos]
-	except:
-		print 'error: file \'%s\' not found in opengrok' % filename
-	else:
-		print 'package:', pkgname
-	
-	return True
-		
-
-if len(sys.argv) < 2:
-	print 'usage: findpackage.py <filename> [project]'
-	exit()
-
-filename = sys.argv[1]
-
-if len(sys.argv) == 3:
-	project = sys.argv[2]
-else:
-	project = 'Symbian2'
-
-if filename.endswith('.lib') or filename.endswith('.dll'):
-	findpackageforlibrary(filename, project)
-else:
-	findpackageforheader(filename, project)
+#!/usr/bin/python
+# findpackage.py - finds which Symbian package contains a file (if any) by searching opengrok
+
+import urllib2
+import urllib
+import os.path
+import cookielib
+import sys
+import getpass
+from BeautifulSoup import BeautifulSoup
+
+user_agent = 'findpackage.py script'
+headers = { 'User-Agent' : user_agent }
+top_level_url = "http://developer.symbian.org"
+
+COOKIEFILE = 'cookies.lwp'
+# the path and filename to save your cookies in
+
+# importing cookielib worked
+urlopen = urllib2.urlopen
+Request = urllib2.Request
+cj = cookielib.LWPCookieJar()
+
+# This is a subclass of FileCookieJar
+# that has useful load and save methods
+if os.path.isfile(COOKIEFILE):
+	cj.load(COOKIEFILE)
+	
+# Now we need to get our Cookie Jar
+# installed in the opener;
+# for fetching URLs
+opener = urllib2.build_opener(urllib2.HTTPCookieProcessor(cj))
+urllib2.install_opener(opener)
+
+def login():
+	loginurl = 'https://developer.symbian.org/main/user_profile/login.php'
+	
+	print >> sys.stderr, 'username: ',
+	username=sys.stdin.readline().strip()
+	password=getpass.getpass()
+	
+	values = {'username' : username,
+	          'password' : password,
+	          'submit': 'Login'}
+	          
+	headers = { 'User-Agent' : user_agent }
+	
+	
+	data = urllib.urlencode(values)
+	req = urllib2.Request(loginurl, data, headers)
+
+	response = urllib2.urlopen(req)
+	doc=response.read()      
+
+	if doc.find('Please try again') != -1:
+		print >> sys.stderr, 'Login failed'
+		return False
+	
+	cj.save(COOKIEFILE) 
+	return True
+
+# we find the package by searching in opengrok for the file, and scrape the output
+def findpackageforlibrary(filename, project):
+
+	dotpos = filename.find('.')
+	
+	if dotpos != -1:
+		searchterm = filename[0:dotpos]
+	else:
+		searchterm = filename
+		
+	searchurl = 'https://developer.symbian.org/xref/sfl/search?q="TARGET+%s"&defs=&refs=&path=&hist=&project=%%2F%s'
+	url = searchurl % (searchterm, project)
+	req = urllib2.Request(url)
+	
+	response = urllib2.urlopen(req)
+	
+	doc=response.read()
+	
+	if doc.find('Restricted access') != -1:
+		if(login()):
+			# try again after login
+			response = urllib2.urlopen(req)
+			doc=response.read()
+		else:
+			return ''
+			
+	
+	# BeatifulSoup chokes on some javascript, so we cut away everything before the <body>
+	try:
+		bodystart=doc.find('<body>')
+		doc = doc[bodystart:]
+	except:
+		pass
+			
+	soup=BeautifulSoup(doc)
+	
+	# let's hope the HTML format never changes...
+	results=soup.findAll('div', id='results')
+	pkgname=''
+	try:
+		temp=results[0].a.string
+		fspos=temp.find('sf')
+		if fspos == -1:
+			raise
+		temp=temp[fspos+3:]
+		pkgpos=temp.find('/')
+		if pkgpos == -1:
+			raise
+		temp=temp[pkgpos+1:]
+	
+		endpkgpos=temp.find('/')
+		if endpkgpos == -1:
+			raise
+		pkgname=temp[0:endpkgpos]
+	except:
+		print 'error: file \'%s\' not found in opengrok' % filename
+	else:
+		print 'first package with target %s: %s' % (searchterm,pkgname)
+	
+	return pkgname
+			
+def findpackageforheader(filename, project):
+	searchterm=filename
+	searchurl = 'https://developer.symbian.org/xref/sfl/search?q=&defs=&refs=&path=%s&hist=&project=%%2F%s'
+	url = searchurl % (searchterm, project)
+
+	req = urllib2.Request(url)
+	
+	response = urllib2.urlopen(req)
+	
+	doc=response.read()
+	
+	if doc.find('Restricted access') != -1:
+		if(login()):
+			# try again after login
+			response = urllib2.urlopen(req)
+			doc=response.read()
+		else:
+			return ''
+			
+	
+	# BeatifulSoup chokes on some javascript, so we cut away everything before the <body>
+	try:
+		bodystart=doc.find('<body>')
+		doc = doc[bodystart:]
+	except:
+		pass
+			
+	soup=BeautifulSoup(doc)
+	
+	# let's hope the HTML format never changes...
+	results=soup.findAll('div', id='results')
+	pkgname=''
+	try:
+		temp=results[0].a.string
+		fspos=temp.find('sf')
+		temp=temp[fspos+3:]
+		pkgpos=temp.find('/')
+		temp=temp[pkgpos+1:]
+	
+		endpkgpos=temp.find('/')
+		pkgname=temp[0:endpkgpos]
+		
+		if len(pkgname) == 0:
+			raise
+	except:
+		print 'error: file \'%s\' not found in opengrok' % filename
+	else:
+		print 'package:', pkgname
+	
+	return pkgname
+
+
+# we find the package owner by attempting to raise a bug for this package
+def findpackageowner(pkgname):
+	pkgurl = 'http://developer.symbian.org/bugs/enter_bug.cgi?product=%s'
+	url = pkgurl % pkgname
+	
+	req = urllib2.Request(url)
+	
+	response = urllib2.urlopen(req)
+	
+	doc=response.read()
+	
+	pos = doc.find('initialowners[0]')
+	
+	if pos != -1:
+		email=doc[pos+20:pos+220] # 100 chars must be enough
+		pos = email.find(';')
+		email = email[0:pos-1]
+		email = email.replace('\\x40', '@')
+		
+		return email
+
+# we find domain by scraping another page, god forbid they change the html
+def findpackagedomain(pkgname):
+	url = 'http://developer.symbian.org/main/source/platform/index.php'	
+
+	req = urllib2.Request(url)
+	
+	response = urllib2.urlopen(req)
+
+	doc=response.read()
+	
+	# BeatifulSoup chokes on some javascript, so we cut away everything before the <body>
+	try:
+		bodystart=doc.find('<body>')
+		doc = doc[bodystart:]
+	except:
+		pass
+			
+	soup=BeautifulSoup(doc)
+	
+		# let's hope the HTML format never changes...
+	results=soup.findAll('li')
+	
+	for result in results:
+		try:
+			temp = result.a.contents[0].lower()
+			if temp.find(pkgname) != -1:
+				return result.parent.parent.h3.a.contents[0]
+		except:
+			pass
+
+if len(sys.argv) < 2:
+	print 'usage: findpackage.py <filename> [project]'
+	exit()
+
+filename = sys.argv[1]
+
+
+if len(sys.argv) == 3:
+	project = sys.argv[2]
+else:
+	project = 'Symbian2'
+
+if filename.endswith('.lib') or filename.endswith('.dll'):
+	pkgname=findpackageforlibrary(filename, project)
+else:
+	pkgname=findpackageforheader(filename, project)
+
+if len(pkgname) > 0:
+	domain=findpackagedomain(pkgname)
+	owner=findpackageowner(pkgname)
+	
+	print "domain:", domain
+	print "owner:", owner
\ No newline at end of file