#!/usr/bin/perl
use IO::Socket;
use Getopt::Long;
my $target_url; #target url for the roadmap
my $tdomain; #tag for the domain to be use in csv file
my $csvfile; #output csv file name
my $authon= ''; #does it require authorisation? default is false
my $count_target; #this value is used to pass a string to match and count on each package backlog
my $ispackage;
my $splitbklogs;
my $summaryheader="ID\tPackage\tFeatures\tFormat\tHttp\n" ;
my $newtdformat = 0;
my @blist = ("Tracking_Package_features","Kernel_EPLization_Backlog",
"Package_Backlog_Q","Wishlist","Test_Package","Backlog_Details");
sub blacklist
{
($name)=@_;
foreach(@blist) {
if ( $name =~ m/$_/sg) { print "WARNING - $name is blacklisted\n"; return 1;}
}
return 0;
}
sub getpage
{
#arguments
($page,$host,$auth,$myfile)=@_;
#output file
open ( outputfile, ">".$myfile);
$port = "http(80)";
$getmess = "GET " . $page ." HTTP/1.1\n" . $auth;
print "INFO - sending message - $getmess\n";
print outputfile "$getmess\n\n";
$sock = IO::Socket::INET->new
(
PeerAddr => $host, PeerPort => $port, Proto => 'tcp',
) ;
print $sock "$getmess\n\n";
while(<$sock>) {
print outputfile $_;
}
close ($sock);
close (outputfile);
}
sub prntfeatures
{
($release,$package,$features,$myfile,$domain)=@_;
$release =~ s/\\//sg;
$csvdel ="\",\"";
$invcoma ="\"";
if ($newtdformat) {
$package =~ s/backlog//sgi;
print $myfile "$invcoma $release $csvdel $domain $csvdel $package $csvdel $myfeat $invcoma\n";
} else {
$features = $features."<dt";
while ( $features =~ /dt\>(.*?)\<\/dt(.*?)\<dt/sg ){
$myfeat = $1;
$subfeat =$2;
$myfeat =~ s/\n/ /sg;
pos($features) = pos($features) -2;
$mystr="";
while ( $subfeat =~ /\<dd\>(.*?)\<\/dd\>/sg) {
$mysubfeat = $mysubfeat.$mystr.$1;
$mystr = " & ";
}
undef $mystr;
$mysubfeat =~ s/,/ /sg;
$mysubfeat =~ s/\n//sg;
$mysubfeat =~ s/\<.*?\>//sg;
print $myfile "$invcoma $release $csvdel $domain $csvdel $package $csvdel $myfeat $csvdel $csvdel $csvdel $mysubfeat $invcoma\n";
$mysubfeat = "";
}
}
}
sub loadfile
{
$/ = " ";
#arguments
($myfile)=@_;
open ( inputfile, "<".$myfile);
my $contents = do { local $/; <inputfile> };
close(inputfile);
return $contents;
}
sub td_roadmap
{
#arguments
($infile,$outfile,$domain,@releases)=@_;
$roadmap=loadfile $infile;
open ( outputfile, ">>".$outfile);
if ($newtdformat) {
print "Processing new TD roadmap format\n";
if ($roadmap =~ m /Contents\<\/h2\>.*?\<\/table/sg) { $roadmap =$';}
foreach (@releases) {
$exp=$_." Roadmap";
if ($roadmap =~ m /($exp)/sg) {
print "PASS - Found entry for $_ \n";
$relroad =$';
if ($roadmap =~ m /table\>(.*?)\<\/table/sg) { $relroad =$1;}
$relroad =~ s/\n//sg;
$skipfirst =1; #skipping the header of the table
while ($relroad =~ m/\<tr\>(.*?)\<\/tr(.*)/sg){
$myfeat=$1;
$relroad =$2;
if ($skipfirst) {$skipfirst=0;next;}
$package="";
if ($myfeat =~ m/title\=\"(.*?)\"/sg) { $package=$1; } #looking for package name
$myfeat=~ s/\<\/td\>\<td\>/\",\"/sg;
$myfeat=~ s/\<.*?\>//sg;
if ($myfeat =~m/[A-z]/sg ){prntfeatures($_,$package,$myfeat,outputfile,$domain);}
}
}
}
} else {
foreach (@releases) {
$exp="\\<h2\\>.*?\\>".$_;
if ($roadmap =~ m /($exp)/sg) {
print "PASS - Found entry for $_ \n";
$relroad =$';
if ($relroad =~ m /(.*?)\<h2/sg) { $relroad =$1;}
$i=0;
while ($relroad=~ m/\<h3\>.*\>(.*?)\<.*<\/h3/g) {
$package = $1;
$ppos[$i]= pos($relroad);
$pname[$i]= $package;
$i++;
}
for ( $i=0;$i<($#ppos); $i++){
$features= substr ($relroad, $ppos[$i],$ppos[$i+1]-$ppos[$i]);
prntfeatures($_,$pname[$i],$features,outputfile,$domain);
}
$features= substr ($relroad, $ppos[$i]);
prntfeatures($_,$pname[$i],$features,outputfile,$domain);
@ppos ="";
@pname ="";
undef ($features);
}
}
}
close (outputfile);
}
sub parse_category {
#arguments
($infile)=@_;
my @mylink;
$mypage=loadfile $infile;
$i=0;
if ( $mypage =~ m/Pages in category(.*)\<\/table/sg) {
print "INFO - Category page found\n";
$mypage = $1;
while ($mypage =~ m /\<a href\=\"(\/wiki\/index\.php\/.*?)\"/g) {
$mylink[$i] = $1;
$i++;
}
print "INFO - Found $i items in the category page\n"
}
return @mylink;
}
sub bklog_domain {
#argument
($mytechdomian)=@_;
$mytechdomian =~s/\)//sg;
$mytechdomian =~s/\(//sg;
$domaininfo=loadfile ("package_domains.csv");
if ($domaininfo =~ m/$mytechdomian.*?\{(.*?)\}/sgi ) {
return $1;
}
print "ERROR - domain not found for $mytechdomian\n";
return "orphan";
}
sub parse_bklog {
#arguments
($infile,$outfile,$id)=@_;
$mypkg=loadfile $infile;
#list if the bklog has been ported to the new bugzilla based format
$headerformat= "wiki_format";
open ( outputfile, ">>".$outfile);
open ( soutputfile, ">>"."summary_".$outfile);
if ($mypkg =~ m/index\.php\/(.*?) HTTP/sg) {
$pagename = $1;
if ($splitbklogs) {
$whichtd = bklog_domain($pagename);
open ( tdoutputfile, ">>".$whichtd. "_".$outfile);
}
if (blacklist($pagename)) {
close (outputfile);
close (soutputfile);
if ($splitbklogs) { close (tdoutputfile);}
return 0;
}
print "INFO -Processing Package $pagename \n";
$i=0;
$found_counter =0;
if ($mypkg =~m/class\=\"bugzilla sortable\"/sg ) { $headerformat="autobug_format"; }
while ($mypkg =~ m/\<tr.*?\>(.*?)\<\/tr/sg) {
$myheader= $&;
if ($myheader =~ m/style=\"background-color\:/sg) {
next;
}
$myfeat= $1;
$myfeat =~ s/\<\/td\>/\t/sg;
$myfeat =~ s/\<.*?\>//sg;
$myfeat =~ s/\n//sg;
if ($myfeat =~ m/IDPStatus/sg) { #header for bugzilla mediawiki plugin
next;
}
if ($myfeat =~ m/[A-z]/sg and not $myfeat =~ m/\<\;etc/sg and
not $myfeat =~ m/\<\;Feature/sg and not $myfeat =~ m/Item not available/sg) {
print outputfile "$pagename\t$myfeat\n";
if ($splitbklogs) { print tdoutputfile "$pagename\t$myfeat\n";}
# print "matching $myfeat with $count_target\n" ;
if ($myfeat =~ m/$count_target/sg) {$found_counter++;}
$i++;
}
}
if ($count_target){
$mycount=$i."\t".$found_counter;
} else {
$mycount=$i;
}
if ($splitbklogs) {
print soutputfile "$id\t$pagename\t$mycount\t$headerformat\t$whichtd\thttp://developer.symbian.org/wiki/index.php/$pagename\n";
} else {
print soutputfile "$id\t$pagename\t$mycount\t$headerformat\thttp://developer.symbian.org/wiki/index.php/$pagename\n";
}
}
close (outputfile);
close (soutputfile);
if ($splitbklogs) { close (tdoutputfile);}
}
#help print
sub printhelp
{
print "\n\n version 1.1
\ngettd.pl -t=url -d=domain \n\nRequired parameters for Technology Roadmaps:\n\t -t url containing the technology domain roadmap\n\t -d the technology domain name
\n\nOptional Parmeters for Technology Roadmaps\n\t-new if the roadmap has the new wiki format
\n\nRequired Parameters for Package backlogs\n\t-p for package backlog analysis. just run gettd.pl -p
\n\nOptional Pararmeters for Package backlogs\n\t -compare [f1] [f2] compares two package summary files for changes ignores order
\n\t -split splits the content of the backlog output into technology domains. requires package_domains.csv file with mapping details
\n\t -count=regexp counts the times that a package backlog line matches the regexp, the results are output to the summary file
\n\nCommonOptional parameters\n\t-o filename ,the output is logged into the output.csv file by default\n\t-h for help
\n\t recommend to run under cygwin environment and perl version v5.10.0 \n
\n\t pages blacklisted for package backlogs are @blist\n";
exit;
}
#compare bklogs
sub compare_bklogs {
#arguments
(@bklogs)=@_;
if (not $#bklogs == 1) { printhelp;}
$cmd ="cut -f 2,3 ". $bklogs[0] . " | sort -u > tmp1.txt";
system($cmd);
$cmd ="cut -f 2,3 ". $bklogs[1] . " | sort -u > tmp2.txt";
system($cmd);
exec ("diff tmp1.txt tmp2.txt | grep '[<|>]'");
system("rm temp*.txt");
exit;
}
#process command line options
sub cmd_options
{
my $help;
my @compare;
GetOptions('h' => \$help,'t=s'=> \$target_url, 'd=s' => \$tdomain , 'o=s' => \$csvfile,
'a' => \$authon , 'p' => \$ispackage, 'compare=s{2}' =>\@compare, 'new' => \$isnewformat,
'split' => \$splitbklogs, 'count=s' => \$count_target);
if (@compare) {
compare_bklogs @compare;
}
if ($count_target) {
print "INFO - Seaching for $count_target\n";
}
if ($help) {
printhelp;
}
if ($ispackage) {
$tdomain =" ";
$target_url = "http://developer.symbian.org/wiki/index.php/Category:Package_Backlog";
}
if ($isnewformat){
$newtdformat = 1;
}
if ( not $target_url) {
print "ERROR-missing arguments target url\n";
printhelp;
}
if (not $tdomain){
print "ERROR-missing arguments domain level\n";
printhelp;
}
print "\nINFO-downloading $target_url with label $tdomain\n";
if (not $csvfile) {
if (not $ispackage) {
$csvfile="output.csv";
} else {
$csvfile="output.txt";
system ("rm *output.txt");
}
}
print "\nINFO-output recorded in $csvfile \n";
}
#main
$/ = " ";
$host1 = "developer.symbian.org";
cmd_options();
if ($authon) {
#file containing login details from http cookie
$mycookie = loadfile("mycookie.txt");
$auth = "Cookie: " . $mycookie ;
}
if ($ispackage) {
getpage($target_url, $host1, $auth, "debug.txt");
@bklog = parse_category("debug.txt");
$j=0;
foreach (@bklog) {
getpage("http://".$host1.$_, $host1, $auth, "pkg".$j.".txt");
parse_bklog ("pkg".$j.".txt",$csvfile, $j);
$j++;
}
} else {
#foundation releases - add as required
@releases=("Symbian\\^2","Symbian\\^3","Symbian\\^4");
getpage($target_url, $host1, $auth, "debug.txt");
td_roadmap("debug.txt" , $csvfile, $tdomain ,@releases);
}