scripts/python/findpackage/findpackage.py
author Sebastian Brannstrom <sebastianb@symbian.org>
Thu, 26 Nov 2009 17:51:27 +0000
changeset 16 b31eb4818219
parent 12 d2f4d301e581
child 18 e2c612a7088c
permissions -rw-r--r--
Improved the script to show tech domain and package owner

#!/usr/bin/python
# findpackage.py - finds which Symbian package contains a file (if any) by searching opengrok

import urllib2
import urllib
import os.path
import cookielib
import sys
import getpass
from BeautifulSoup import BeautifulSoup

user_agent = 'findpackage.py script'
headers = { 'User-Agent' : user_agent }
top_level_url = "http://developer.symbian.org"

COOKIEFILE = 'cookies.lwp'
# the path and filename to save your cookies in

# importing cookielib worked
urlopen = urllib2.urlopen
Request = urllib2.Request
cj = cookielib.LWPCookieJar()

# This is a subclass of FileCookieJar
# that has useful load and save methods
if os.path.isfile(COOKIEFILE):
	cj.load(COOKIEFILE)
	
# Now we need to get our Cookie Jar
# installed in the opener;
# for fetching URLs
opener = urllib2.build_opener(urllib2.HTTPCookieProcessor(cj))
urllib2.install_opener(opener)

def login():
	loginurl = 'https://developer.symbian.org/main/user_profile/login.php'
	
	print >> sys.stderr, 'username: ',
	username=sys.stdin.readline().strip()
	password=getpass.getpass()
	
	values = {'username' : username,
	          'password' : password,
	          'submit': 'Login'}
	          
	headers = { 'User-Agent' : user_agent }
	
	
	data = urllib.urlencode(values)
	req = urllib2.Request(loginurl, data, headers)

	response = urllib2.urlopen(req)
	doc=response.read()      

	if doc.find('Please try again') != -1:
		print >> sys.stderr, 'Login failed'
		return False
	
	cj.save(COOKIEFILE) 
	return True

# we find the package by searching in opengrok for the file, and scrape the output
def findpackageforlibrary(filename, project):

	dotpos = filename.find('.')
	
	if dotpos != -1:
		searchterm = filename[0:dotpos]
	else:
		searchterm = filename
		
	searchurl = 'https://developer.symbian.org/xref/sfl/search?q="TARGET+%s"&defs=&refs=&path=&hist=&project=%%2F%s'
	url = searchurl % (searchterm, project)
	req = urllib2.Request(url)
	
	response = urllib2.urlopen(req)
	
	doc=response.read()
	
	if doc.find('Restricted access') != -1:
		if(login()):
			# try again after login
			response = urllib2.urlopen(req)
			doc=response.read()
		else:
			return ''
			
	
	# BeatifulSoup chokes on some javascript, so we cut away everything before the <body>
	try:
		bodystart=doc.find('<body>')
		doc = doc[bodystart:]
	except:
		pass
			
	soup=BeautifulSoup(doc)
	
	# let's hope the HTML format never changes...
	results=soup.findAll('div', id='results')
	pkgname=''
	try:
		temp=results[0].a.string
		fspos=temp.find('sf')
		if fspos == -1:
			raise
		temp=temp[fspos+3:]
		pkgpos=temp.find('/')
		if pkgpos == -1:
			raise
		temp=temp[pkgpos+1:]
	
		endpkgpos=temp.find('/')
		if endpkgpos == -1:
			raise
		pkgname=temp[0:endpkgpos]
	except:
		print 'error: file \'%s\' not found in opengrok' % filename
	else:
		print 'first package with target %s: %s' % (searchterm,pkgname)
	
	return pkgname
			
def findpackageforheader(filename, project):
	searchterm=filename
	searchurl = 'https://developer.symbian.org/xref/sfl/search?q=&defs=&refs=&path=%s&hist=&project=%%2F%s'
	url = searchurl % (searchterm, project)

	req = urllib2.Request(url)
	
	response = urllib2.urlopen(req)
	
	doc=response.read()
	
	if doc.find('Restricted access') != -1:
		if(login()):
			# try again after login
			response = urllib2.urlopen(req)
			doc=response.read()
		else:
			return ''
			
	
	# BeatifulSoup chokes on some javascript, so we cut away everything before the <body>
	try:
		bodystart=doc.find('<body>')
		doc = doc[bodystart:]
	except:
		pass
			
	soup=BeautifulSoup(doc)
	
	# let's hope the HTML format never changes...
	results=soup.findAll('div', id='results')
	pkgname=''
	try:
		temp=results[0].a.string
		fspos=temp.find('sf')
		temp=temp[fspos+3:]
		pkgpos=temp.find('/')
		temp=temp[pkgpos+1:]
	
		endpkgpos=temp.find('/')
		pkgname=temp[0:endpkgpos]
		
		if len(pkgname) == 0:
			raise
	except:
		print 'error: file \'%s\' not found in opengrok' % filename
	else:
		print 'package:', pkgname
	
	return pkgname


# we find the package owner by attempting to raise a bug for this package
def findpackageowner(pkgname):
	pkgurl = 'http://developer.symbian.org/bugs/enter_bug.cgi?product=%s'
	url = pkgurl % pkgname
	
	req = urllib2.Request(url)
	
	response = urllib2.urlopen(req)
	
	doc=response.read()
	
	pos = doc.find('initialowners[0]')
	
	if pos != -1:
		email=doc[pos+20:pos+220] # 100 chars must be enough
		pos = email.find(';')
		email = email[0:pos-1]
		email = email.replace('\\x40', '@')
		
		return email

# we find domain by scraping another page, god forbid they change the html
def findpackagedomain(pkgname):
	url = 'http://developer.symbian.org/main/source/platform/index.php'	

	req = urllib2.Request(url)
	
	response = urllib2.urlopen(req)

	doc=response.read()
	
	# BeatifulSoup chokes on some javascript, so we cut away everything before the <body>
	try:
		bodystart=doc.find('<body>')
		doc = doc[bodystart:]
	except:
		pass
			
	soup=BeautifulSoup(doc)
	
		# let's hope the HTML format never changes...
	results=soup.findAll('li')
	
	for result in results:
		try:
			temp = result.a.contents[0].lower()
			if temp.find(pkgname) != -1:
				return result.parent.parent.h3.a.contents[0]
		except:
			pass

if len(sys.argv) < 2:
	print 'usage: findpackage.py <filename> [project]'
	exit()

filename = sys.argv[1]


if len(sys.argv) == 3:
	project = sys.argv[2]
else:
	project = 'Symbian2'

if filename.endswith('.lib') or filename.endswith('.dll'):
	pkgname=findpackageforlibrary(filename, project)
else:
	pkgname=findpackageforheader(filename, project)

if len(pkgname) > 0:
	domain=findpackagedomain(pkgname)
	owner=findpackageowner(pkgname)
	
	print "domain:", domain
	print "owner:", owner