scripts/roadmap/gettd.pl
author victorp@symbian.org
Thu, 28 Jan 2010 16:26:30 +0000
changeset 29 13a99a13ca90
permissions -rwxr-xr-x
added commit count script for appache logs

#!/usr/bin/perl

 

use IO::Socket; 
use Getopt::Long;


my $target_url; #target url for the roadmap
my $tdomain; #tag for the domain to be use in csv file
my $csvfile; #output csv file name
my $authon= '';	 #does it require authorisation? default is false


my $count_target; #this value is used to pass a string to match and count on each package backlog
my $ispackage;
my $splitbklogs;
my $summaryheader="ID\tPackage\tFeatures\tFormat\tHttp\n" ;
my $newtdformat = 0;
my @blist = ("Tracking_Package_features","Kernel_EPLization_Backlog",
    "Package_Backlog_Q","Wishlist","Test_Package","Backlog_Details");


sub blacklist
{
 ($name)=@_;
      
 foreach(@blist) {
 
 if ( $name =~ m/$_/sg) { print "WARNING - $name is blacklisted\n"; return 1;}
 }
 
 return 0;

}


sub getpage
{
	#arguments
	($page,$host,$auth,$myfile)=@_;
	
	
	#output file
	open ( outputfile, ">".$myfile);
	
	
	$port = "http(80)";
	$getmess = "GET " . $page ." HTTP/1.1\n" . $auth;

	print "INFO - sending message - $getmess\n";
	print outputfile "$getmess\n\n";

	$sock = IO::Socket::INET->new 	
		(
		 PeerAddr => $host,   PeerPort => $port,  Proto => 'tcp', 
		) ;

 
	print $sock "$getmess\n\n";

 
	while(<$sock>) {
 
	  print outputfile $_;
 
	}	
  	
	close ($sock);
	close (outputfile);
}

sub prntfeatures 
{

	($release,$package,$features,$myfile,$domain)=@_;
	
  $release =~ s/\\//sg;	
	$csvdel ="\",\"";
	$invcoma ="\"";
 if ($newtdformat) {
  $package =~ s/backlog//sgi;
  print $myfile "$invcoma $release $csvdel $domain $csvdel $package $csvdel $myfeat $invcoma\n";
  
 } else {
		
	$features = $features."<dt";

	

	while ( $features =~ /dt\>(.*?)\<\/dt(.*?)\<dt/sg  ){
		$myfeat = $1;
		$subfeat =$2;
		
		$myfeat =~ s/\n/ /sg;
		
		pos($features) = pos($features) -2;
		
		$mystr="";
		while ( $subfeat =~ /\<dd\>(.*?)\<\/dd\>/sg) {
			$mysubfeat = $mysubfeat.$mystr.$1;
			$mystr = " & ";
		}
		undef $mystr;
	$mysubfeat =~ s/,/ /sg;
	$mysubfeat =~ s/\n//sg;
	$mysubfeat =~ s/\<.*?\>//sg;
	

	print $myfile "$invcoma $release $csvdel $domain $csvdel $package $csvdel $myfeat $csvdel $csvdel $csvdel $mysubfeat $invcoma\n";
	
	$mysubfeat = "";	
	}
		
 }
}
	
sub loadfile
{

	$/ = " ";
	#arguments
	($myfile)=@_;
	open ( inputfile, "<".$myfile);
	my $contents = do { local $/;  <inputfile> };
	close(inputfile);
	return $contents;

}

sub td_roadmap
{


	#arguments
	($infile,$outfile,$domain,@releases)=@_;

	
	$roadmap=loadfile $infile;
	open ( outputfile, ">>".$outfile);


  if ($newtdformat) {
       print "Processing new TD roadmap format\n";
         if ($roadmap =~ m /Contents\<\/h2\>.*?\<\/table/sg) { $roadmap =$';}
         foreach (@releases) {
          $exp=$_." Roadmap";
		         
           if ($roadmap =~ m /($exp)/sg) { 
			     print "PASS - Found entry for $_ \n";
			     $relroad =$';	
			
			     if ($roadmap =~ m /table\>(.*?)\<\/table/sg) { $relroad =$1;}
			      $relroad =~ s/\n//sg;
			      
            $skipfirst =1; #skipping the header of the table         
            while ($relroad =~ m/\<tr\>(.*?)\<\/tr(.*)/sg){
                  $myfeat=$1;
                  $relroad =$2;
                  if ($skipfirst) {$skipfirst=0;next;}
                  $package="";
                  if  ($myfeat =~ m/title\=\"(.*?)\"/sg) {  $package=$1; } #looking for package name
                                  
                  $myfeat=~ s/\<\/td\>\<td\>/\",\"/sg;   
                  $myfeat=~ s/\<.*?\>//sg;
                 
                 if ($myfeat =~m/[A-z]/sg ){prntfeatures($_,$package,$myfeat,outputfile,$domain);}
                
                 }  		     
         }
        }
  } else {

	 foreach (@releases) {
		
	 	$exp="\\<h2\\>.*?\\>".$_;
		  
		if ($roadmap =~ m /($exp)/sg) { 
			print "PASS - Found entry for $_ \n";
			$relroad =$';	
			
			if ($relroad =~ m /(.*?)\<h2/sg) { $relroad =$1;}
			$i=0;	
			while ($relroad=~ m/\<h3\>.*\>(.*?)\<.*<\/h3/g) {
				$package = $1;		
				$ppos[$i]= pos($relroad);
				$pname[$i]= $package;
				$i++;
			}
			for ( $i=0;$i<($#ppos); $i++){
				$features= substr ($relroad, $ppos[$i],$ppos[$i+1]-$ppos[$i]);
				prntfeatures($_,$pname[$i],$features,outputfile,$domain);
			}
			$features= substr ($relroad, $ppos[$i]);
		
			prntfeatures($_,$pname[$i],$features,outputfile,$domain);
			@ppos ="";
			@pname ="";
			undef ($features);
		}
	}		 	

	}
	
	

	close (outputfile);


}


sub parse_category {

	#arguments
	($infile)=@_;

	my @mylink;

	$mypage=loadfile $infile;
	$i=0;	
	if ( $mypage =~ m/Pages in category(.*)\<\/table/sg) {
		print "INFO - Category page found\n";
		$mypage = $1;
		
		while ($mypage =~ m /\<a href\=\"(\/wiki\/index\.php\/.*?)\"/g) {
			
			$mylink[$i] = $1;	
			$i++;
			
		}
	print "INFO - Found $i items in the category page\n"
	}
	return @mylink;
}

sub bklog_domain {
 #argument
($mytechdomian)=@_;

  $mytechdomian =~s/\)//sg;
  $mytechdomian =~s/\(//sg;
  
 $domaininfo=loadfile ("package_domains.csv");
 if ($domaininfo =~ m/$mytechdomian.*?\{(.*?)\}/sgi ) {
      return $1;
 }
  print "ERROR - domain not found for $mytechdomian\n";
  return "orphan";
 
}

sub parse_bklog {
	
	#arguments
	($infile,$outfile,$id)=@_;
	$mypkg=loadfile $infile;
	#list if the bklog has been ported to the new bugzilla based format
  $headerformat= "wiki_format";
  
	
	open ( outputfile, ">>".$outfile);
	open ( soutputfile, ">>"."summary_".$outfile);
	
	if ($mypkg =~ m/index\.php\/(.*?) HTTP/sg) {
  
		$pagename = $1;
		
    if ($splitbklogs) {
      $whichtd =  bklog_domain($pagename);
      open ( tdoutputfile, ">>".$whichtd. "_".$outfile);
     }
    
    
    
    if (blacklist($pagename)) {
       
    	  close (outputfile);
	      close (soutputfile);
	    
	      if ($splitbklogs) { close (tdoutputfile);}
        return 0;
    }
		print "INFO -Processing Package $pagename \n";
		$i=0;
		$found_counter =0;
		
		if ($mypkg =~m/class\=\"bugzilla sortable\"/sg ) { $headerformat="autobug_format"; }
		
		while ($mypkg =~ m/\<tr.*?\>(.*?)\<\/tr/sg) { 
			$myheader= $&;
      if ($myheader =~ m/style=\"background-color\:/sg) {
        next;
      }
			$myfeat= $1;
			$myfeat =~ s/\<\/td\>/\t/sg;
			$myfeat =~ s/\<.*?\>//sg;
			$myfeat =~ s/\n//sg;
			
			if ($myfeat =~ m/IDPStatus/sg) { #header for bugzilla mediawiki plugin
        next;
      }
			
			if ($myfeat =~ m/[A-z]/sg and not $myfeat =~ m/\&lt\;etc/sg and 
			not $myfeat =~ m/\&lt\;Feature/sg and not $myfeat =~ m/Item not available/sg) {
				print outputfile "$pagename\t$myfeat\n";
				if ($splitbklogs) { print tdoutputfile "$pagename\t$myfeat\n";}
        
      #  print "matching $myfeat with $count_target\n"    ;
        if ($myfeat =~ m/$count_target/sg) {$found_counter++;}
        $i++;
			}
			
		}
  
  if ($count_target){
    $mycount=$i."\t".$found_counter;
  } else {
    $mycount=$i;
  }
	
	if ($splitbklogs) { 
    print soutputfile "$id\t$pagename\t$mycount\t$headerformat\t$whichtd\thttp://developer.symbian.org/wiki/index.php/$pagename\n";
	} else {
    print soutputfile "$id\t$pagename\t$mycount\t$headerformat\thttp://developer.symbian.org/wiki/index.php/$pagename\n";
  }

	}

	close (outputfile);
	close (soutputfile);
  if ($splitbklogs) { close (tdoutputfile);}
                           
}




#help print
sub printhelp
{
       
	print "\n\n version 1.1
	\ngettd.pl -t=url -d=domain \n\nRequired parameters for Technology Roadmaps:\n\t -t url containing the technology domain roadmap\n\t -d the technology domain name
	\n\nOptional Parmeters for Technology Roadmaps\n\t-new if the roadmap has the new wiki format
  \n\nRequired Parameters for Package backlogs\n\t-p for package backlog analysis. just run gettd.pl -p
  \n\nOptional Pararmeters for Package backlogs\n\t -compare [f1] [f2] compares two package summary files for changes ignores order
  \n\t -split splits the content of the backlog output into technology domains. requires package_domains.csv file with mapping details
  \n\t -count=regexp counts the times that a package backlog line matches the regexp, the results are output to the summary file
  \n\nCommonOptional parameters\n\t-o filename ,the output is logged into the output.csv file by default\n\t-h for help
	\n\t recommend to run under cygwin environment and perl version v5.10.0 \n
  \n\t pages blacklisted for package backlogs are @blist\n";
	exit;
}



#compare bklogs
sub compare_bklogs {
	#arguments
	(@bklogs)=@_;
	
	if (not $#bklogs == 1) { printhelp;}

	
	$cmd ="cut -f 2,3 ". $bklogs[0] . " | sort -u > tmp1.txt";
	
	system($cmd);
	
	$cmd ="cut -f 2,3 ". $bklogs[1] . " | sort -u > tmp2.txt";
	system($cmd);
	
	exec ("diff tmp1.txt tmp2.txt | grep '[<|>]'");
	system("rm temp*.txt");
	
	exit;

}




#process command line options
sub cmd_options
{

  my $help;
  my @compare;


  GetOptions('h' => \$help,'t=s'=> \$target_url, 'd=s' => \$tdomain , 'o=s' => \$csvfile, 
	'a' => \$authon , 'p' => \$ispackage, 'compare=s{2}' =>\@compare, 'new' => \$isnewformat,
   'split' => \$splitbklogs, 'count=s' => \$count_target);

  if (@compare) {
	compare_bklogs @compare;
	
  }
  if ($count_target) {
    print "INFO - Seaching for $count_target\n";
  }

  if ($help) {
    printhelp;
  }


 if ($ispackage) {

 	$tdomain =" ";
	$target_url = "http://developer.symbian.org/wiki/index.php/Category:Package_Backlog";
	
 }  
 if ($isnewformat){
    $newtdformat = 1;
 
 }

 if ( not $target_url) {

	print "ERROR-missing arguments target url\n";
	printhelp;	
  } 


 if (not $tdomain){
	print "ERROR-missing arguments domain level\n";
	printhelp;
 }

 	print "\nINFO-downloading $target_url with label $tdomain\n";
  

 if (not $csvfile) {
	if (not $ispackage) { 
		$csvfile="output.csv";
		
	} else {
		$csvfile="output.txt";
		system ("rm *output.txt");
	
	}
 }
 print "\nINFO-output recorded in $csvfile \n";

                                      

}
#main
$/ = " ";
$host1 = "developer.symbian.org";

cmd_options();

if ($authon) {
	#file containing login details from http cookie
	$mycookie = loadfile("mycookie.txt");

	$auth = "Cookie: " . $mycookie ;
}


if ($ispackage) {
	getpage($target_url, $host1, $auth, "debug.txt");
	@bklog = parse_category("debug.txt");
	$j=0;
	
	foreach (@bklog) {
		getpage("http://".$host1.$_, $host1, $auth, "pkg".$j.".txt");
		parse_bklog ("pkg".$j.".txt",$csvfile, $j);
		$j++;
		
	

	}

} else {

	#foundation releases - add as required
	@releases=("Symbian\\^2","Symbian\\^3","Symbian\\^4");

	getpage($target_url, $host1, $auth, "debug.txt");
	td_roadmap("debug.txt" , $csvfile, $tdomain ,@releases);
}