tdroadmap_merger/gettd.pl
author victorp@symbian.org
Fri, 31 Jul 2009 11:01:24 +0100
changeset 23 dbe87093a3ca
child 24 8fb98cf0b1ac
permissions -rw-r--r--
adding roadmap_merger script - first version

#!/usr/bin/perl

 

use IO::Socket; 
use Getopt::Long;


my $target_url;
my $tdomain;
my $csvfile;

sub getpage
{
	#arguments
	($page,$host,$auth,$myfile)=@_;
	
	
	#output file
	open ( outputfile, ">".$myfile);
	
	
	$port = "http(80)";
	$getmess = "GET " . $page ." HTTP/1.1\n" . $auth;

	print "sending message - $getmess\n";
	print outputfile "$getmess\n\n";

	$sock = IO::Socket::INET->new 	
		(
		 PeerAddr => $host,   PeerPort => $port,  Proto => 'tcp', 
		) ;

 
	print $sock "$getmess\n\n";

 
	while(<$sock>) {
 
	  print outputfile $_;
 
	}	
  	
	close ($sock);
	close (outputfile);
}

sub prntfeatures 
{

	($release,$package,$features,$myfile,$domain)=@_;
	
	$features = $features."<dt";

	

	while ( $features =~ /dt\>(.*?)\<\/dt(.*?)\<dt/sg  ){
		$myfeat = $1;
		$subfeat =$2;
		
		$myfeat =~ s/\n/ /sg;
		
		pos($features) = pos($features) -2;
		
		$mystr="";
		while ( $subfeat =~ /\<dd\>(.*?)\<\/dd\>/sg) {
			$mysubfeat = $mysubfeat.$mystr.$1;
			$mystr = " & ";
		}
		undef $mystr;
	$mysubfeat =~ s/,/ /sg;
	$mysubfeat =~ s/\n//sg;
	$mysubfeat =~ s/\<.*?\>//sg;
	
	$release =~ s/\\//sg;	
	print $myfile " $release, $domain, $package, $myfeat, $mysubfeat\n";
	
	$mysubfeat = "";	
	}
		

}
	
sub loadfile
{

	$/ = " ";
	#arguments
	($myfile)=@_;
	open ( inputfile, "<".$myfile);
	my $contents = do { local $/;  <inputfile> };
	close(inputfile);
	return $contents;

}

sub td_roadmap
{


	#arguments
	($infile,$outfile,$domain,@releases)=@_;

	
	$roadmap=loadfile $infile;
	open ( outputfile, ">>".$outfile);



	foreach (@releases) {
		
		$exp="\\<h2\\>.*?\\>".$_;
		
		if ($roadmap =~ m /($exp)/sg) { 
			print "Found entry for $_ \n";
			$relroad =$';	
			
			if ($relroad =~ m /(.*?)\<h2/sg) { $relroad =$1;}
			$i=0;	
			while ($relroad=~ m/\<h3\>.*\>(.*?)\<.*<\/h3/g) {
				$package = $1;		
				$ppos[$i]= pos($relroad);
				$pname[$i]= $package;
				$i++;
			}
			for ( $i=0;$i<($#ppos); $i++){
				$features= substr ($relroad, $ppos[$i],$ppos[$i+1]-$ppos[$i]);
				prntfeatures($_,$pname[$i],$features,outputfile,$domain);
			}
			$features= substr ($relroad, $ppos[$i]);
		
			prntfeatures($_,$pname[$i],$features,outputfile,$domain);
			@ppos ="";
			@pname ="";
			undef ($features);
		}
			 	

	}
	
	

	close (outputfile);


}


#help print
sub printhelp
{

	print "\n\n version 0.2 
	\ngettd.pl -t=url -d=domain \nrequired parameters:\n\t -t url containing the technology domain roadmap\n\t -d the technology domain name
	\n Optional parameters\n\t-o filename ,the output is logged into the output.csv file by default\n\t-h for help";
	exit;
}


#process command line options
sub cmd_options
{


  my $help;


  GetOptions('h' => \$help,'t=s'=> \$target_url, 'd=s' => \$tdomain , 'o=s' => \$csvfile);

  if ($help) {
    printhelp;
  }

  
 if ( not $target_url) {

	print "ERROR-missing arguments target url\n";
	printhelp;	
  } 

 if (not $tdomain){
	print "ERROR-missing arguments domain level\n";
	printhelp;
 }

 	print "\nINFO-downloading $target_url with label $tdomain\n";
  

 if (not $csvfile) {
	$csvfile="output.csv";
 }
 print "\nINFO-output recorded in $csvfile \n";
        



}
#main
$/ = " ";

#file containing login details from http cookie
$mycookie = loadfile("mycookie.txt");

#$auth ="Authorization: Basic Zm91bmRhdGlvbjp0ZXN0MA==";
$auth = "Cookie: " . $mycookie ;

#foundation releases - add as required
@releases=("Symbian\\^2","Symbian\\^3","Symbian\\^4");


$host1 = "developer.symbian.org";


cmd_options();

getpage($target_url, $host1, $auth, "debug.txt");
td_roadmap("debug.txt" , $csvfile, $tdomain ,@releases);