diff -r ab800b3e45ab -r 13a99a13ca90 scripts/gettd.pl --- a/scripts/gettd.pl Wed Jan 27 10:52:27 2010 +0000 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 @@ -1,484 +0,0 @@ -#!/usr/bin/perl - - - -use IO::Socket; -use Getopt::Long; - - -my $target_url; #target url for the roadmap -my $tdomain; #tag for the domain to be use in csv file -my $csvfile; #output csv file name -my $authon= ''; #does it require authorisation? default is false - - -my $count_target; #this value is used to pass a string to match and count on each package backlog -my $ispackage; -my $splitbklogs; -my $summaryheader="ID\tPackage\tFeatures\tFormat\tHttp\n" ; -my $newtdformat = 0; -my @blist = ("Tracking_Package_features","Kernel_EPLization_Backlog", - "Package_Backlog_Q","Wishlist","Test_Package","Backlog_Details"); - - -sub blacklist -{ - ($name)=@_; - - foreach(@blist) { - - if ( $name =~ m/$_/sg) { print "WARNING - $name is blacklisted\n"; return 1;} - } - - return 0; - -} - - -sub getpage -{ - #arguments - ($page,$host,$auth,$myfile)=@_; - - - #output file - open ( outputfile, ">".$myfile); - - - $port = "http(80)"; - $getmess = "GET " . $page ." HTTP/1.1\n" . $auth; - - print "INFO - sending message - $getmess\n"; - print outputfile "$getmess\n\n"; - - $sock = IO::Socket::INET->new - ( - PeerAddr => $host, PeerPort => $port, Proto => 'tcp', - ) ; - - - print $sock "$getmess\n\n"; - - - while(<$sock>) { - - print outputfile $_; - - } - - close ($sock); - close (outputfile); -} - -sub prntfeatures -{ - - ($release,$package,$features,$myfile,$domain)=@_; - - $release =~ s/\\//sg; - $csvdel ="\",\""; - $invcoma ="\""; - if ($newtdformat) { - $package =~ s/backlog//sgi; - print $myfile "$invcoma $release $csvdel $domain $csvdel $package $csvdel $myfeat $invcoma\n"; - - } else { - - $features = $features."(.*?)\<\/dt(.*?)\
(.*?)\<\/dd\>/sg) { - $mysubfeat = $mysubfeat.$mystr.$1; - $mystr = " & "; - } - undef $mystr; - $mysubfeat =~ s/,/ /sg; - $mysubfeat =~ s/\n//sg; - $mysubfeat =~ s/\<.*?\>//sg; - - - print $myfile "$invcoma $release $csvdel $domain $csvdel $package $csvdel $myfeat $csvdel $csvdel $csvdel $mysubfeat $invcoma\n"; - - $mysubfeat = ""; - } - - } -} - -sub loadfile -{ - - $/ = " "; - #arguments - ($myfile)=@_; - open ( inputfile, "<".$myfile); - my $contents = do { local $/; }; - close(inputfile); - return $contents; - -} - -sub td_roadmap -{ - - - #arguments - ($infile,$outfile,$domain,@releases)=@_; - - - $roadmap=loadfile $infile; - open ( outputfile, ">>".$outfile); - - - if ($newtdformat) { - print "Processing new TD roadmap format\n"; - if ($roadmap =~ m /Contents\<\/h2\>.*?\<\/table/sg) { $roadmap =$';} - foreach (@releases) { - $exp=$_." Roadmap"; - - if ($roadmap =~ m /($exp)/sg) { - print "PASS - Found entry for $_ \n"; - $relroad =$'; - - if ($roadmap =~ m /table\>(.*?)\<\/table/sg) { $relroad =$1;} - $relroad =~ s/\n//sg; - - $skipfirst =1; #skipping the header of the table - while ($relroad =~ m/\(.*?)\<\/tr(.*)/sg){ - $myfeat=$1; - $relroad =$2; - if ($skipfirst) {$skipfirst=0;next;} - $package=""; - if ($myfeat =~ m/title\=\"(.*?)\"/sg) { $package=$1; } #looking for package name - - $myfeat=~ s/\<\/td\>\/\",\"/sg; - $myfeat=~ s/\<.*?\>//sg; - - if ($myfeat =~m/[A-z]/sg ){prntfeatures($_,$package,$myfeat,outputfile,$domain);} - - } - } - } - } else { - - foreach (@releases) { - - $exp="\\.*?\\>".$_; - - if ($roadmap =~ m /($exp)/sg) { - print "PASS - Found entry for $_ \n"; - $relroad =$'; - - if ($relroad =~ m /(.*?)\

.*\>(.*?)\<.*<\/h3/g) { - $package = $1; - $ppos[$i]= pos($relroad); - $pname[$i]= $package; - $i++; - } - for ( $i=0;$i<($#ppos); $i++){ - $features= substr ($relroad, $ppos[$i],$ppos[$i+1]-$ppos[$i]); - prntfeatures($_,$pname[$i],$features,outputfile,$domain); - } - $features= substr ($relroad, $ppos[$i]); - - prntfeatures($_,$pname[$i],$features,outputfile,$domain); - @ppos =""; - @pname =""; - undef ($features); - } - } - - } - - - - close (outputfile); - - -} - - -sub parse_category { - - #arguments - ($infile)=@_; - - my @mylink; - - $mypage=loadfile $infile; - $i=0; - if ( $mypage =~ m/Pages in category(.*)\<\/table/sg) { - print "INFO - Category page found\n"; - $mypage = $1; - - while ($mypage =~ m /\>".$outfile); - open ( soutputfile, ">>"."summary_".$outfile); - - if ($mypkg =~ m/index\.php\/(.*?) HTTP/sg) { - - $pagename = $1; - - if ($splitbklogs) { - $whichtd = bklog_domain($pagename); - open ( tdoutputfile, ">>".$whichtd. "_".$outfile); - } - - - - if (blacklist($pagename)) { - - close (outputfile); - close (soutputfile); - - if ($splitbklogs) { close (tdoutputfile);} - return 0; - } - print "INFO -Processing Package $pagename \n"; - $i=0; - $found_counter =0; - - if ($mypkg =~m/class\=\"bugzilla sortable\"/sg ) { $headerformat="autobug_format"; } - - while ($mypkg =~ m/\(.*?)\<\/tr/sg) { - $myheader= $&; - if ($myheader =~ m/style=\"background-color\:/sg) { - next; - } - $myfeat= $1; - $myfeat =~ s/\<\/td\>/\t/sg; - $myfeat =~ s/\<.*?\>//sg; - $myfeat =~ s/\n//sg; - - if ($myfeat =~ m/IDPStatus/sg) { #header for bugzilla mediawiki plugin - next; - } - - if ($myfeat =~ m/[A-z]/sg and not $myfeat =~ m/\<\;etc/sg and - not $myfeat =~ m/\<\;Feature/sg and not $myfeat =~ m/Item not available/sg) { - print outputfile "$pagename\t$myfeat\n"; - if ($splitbklogs) { print tdoutputfile "$pagename\t$myfeat\n";} - - # print "matching $myfeat with $count_target\n" ; - if ($myfeat =~ m/$count_target/sg) {$found_counter++;} - $i++; - } - - } - - if ($count_target){ - $mycount=$i."\t".$found_counter; - } else { - $mycount=$i; - } - - if ($splitbklogs) { - print soutputfile "$id\t$pagename\t$mycount\t$headerformat\t$whichtd\thttp://developer.symbian.org/wiki/index.php/$pagename\n"; - } else { - print soutputfile "$id\t$pagename\t$mycount\t$headerformat\thttp://developer.symbian.org/wiki/index.php/$pagename\n"; - } - - } - - close (outputfile); - close (soutputfile); - if ($splitbklogs) { close (tdoutputfile);} - -} - - - - -#help print -sub printhelp -{ - - print "\n\n version 1.1 - \ngettd.pl -t=url -d=domain \n\nRequired parameters for Technology Roadmaps:\n\t -t url containing the technology domain roadmap\n\t -d the technology domain name - \n\nOptional Parmeters for Technology Roadmaps\n\t-new if the roadmap has the new wiki format - \n\nRequired Parameters for Package backlogs\n\t-p for package backlog analysis. just run gettd.pl -p - \n\nOptional Pararmeters for Package backlogs\n\t -compare [f1] [f2] compares two package summary files for changes ignores order - \n\t -split splits the content of the backlog output into technology domains. requires package_domains.csv file with mapping details - \n\t -count=regexp counts the times that a package backlog line matches the regexp, the results are output to the summary file - \n\nCommonOptional parameters\n\t-o filename ,the output is logged into the output.csv file by default\n\t-h for help - \n\t recommend to run under cygwin environment and perl version v5.10.0 \n - \n\t pages blacklisted for package backlogs are @blist\n"; - exit; -} - - - -#compare bklogs -sub compare_bklogs { - #arguments - (@bklogs)=@_; - - if (not $#bklogs == 1) { printhelp;} - - - $cmd ="cut -f 2,3 ". $bklogs[0] . " | sort -u > tmp1.txt"; - - system($cmd); - - $cmd ="cut -f 2,3 ". $bklogs[1] . " | sort -u > tmp2.txt"; - system($cmd); - - exec ("diff tmp1.txt tmp2.txt | grep '[<|>]'"); - system("rm temp*.txt"); - - exit; - -} - - - - -#process command line options -sub cmd_options -{ - - my $help; - my @compare; - - - GetOptions('h' => \$help,'t=s'=> \$target_url, 'd=s' => \$tdomain , 'o=s' => \$csvfile, - 'a' => \$authon , 'p' => \$ispackage, 'compare=s{2}' =>\@compare, 'new' => \$isnewformat, - 'split' => \$splitbklogs, 'count=s' => \$count_target); - - if (@compare) { - compare_bklogs @compare; - - } - if ($count_target) { - print "INFO - Seaching for $count_target\n"; - } - - if ($help) { - printhelp; - } - - - if ($ispackage) { - - $tdomain =" "; - $target_url = "http://developer.symbian.org/wiki/index.php/Category:Package_Backlog"; - - } - if ($isnewformat){ - $newtdformat = 1; - - } - - if ( not $target_url) { - - print "ERROR-missing arguments target url\n"; - printhelp; - } - - - if (not $tdomain){ - print "ERROR-missing arguments domain level\n"; - printhelp; - } - - print "\nINFO-downloading $target_url with label $tdomain\n"; - - - if (not $csvfile) { - if (not $ispackage) { - $csvfile="output.csv"; - - } else { - $csvfile="output.txt"; - system ("rm *output.txt"); - - } - } - print "\nINFO-output recorded in $csvfile \n"; - - - -} -#main -$/ = " "; -$host1 = "developer.symbian.org"; - -cmd_options(); - -if ($authon) { - #file containing login details from http cookie - $mycookie = loadfile("mycookie.txt"); - - $auth = "Cookie: " . $mycookie ; -} - - -if ($ispackage) { - getpage($target_url, $host1, $auth, "debug.txt"); - @bklog = parse_category("debug.txt"); - $j=0; - - foreach (@bklog) { - getpage("http://".$host1.$_, $host1, $auth, "pkg".$j.".txt"); - parse_bklog ("pkg".$j.".txt",$csvfile, $j); - $j++; - - - - } - -} else { - - #foundation releases - add as required - @releases=("Symbian\\^2","Symbian\\^3","Symbian\\^4"); - - getpage($target_url, $host1, $auth, "debug.txt"); - td_roadmap("debug.txt" , $csvfile, $tdomain ,@releases); -} \ No newline at end of file