tdroadmap_merger/gettd.pl
changeset 23 dbe87093a3ca
child 24 8fb98cf0b1ac
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/tdroadmap_merger/gettd.pl	Fri Jul 31 11:01:24 2009 +0100
@@ -0,0 +1,217 @@
+#!/usr/bin/perl
+
+ 
+
+use IO::Socket; 
+use Getopt::Long;
+
+
+my $target_url;
+my $tdomain;
+my $csvfile;
+
+sub getpage
+{
+	#arguments
+	($page,$host,$auth,$myfile)=@_;
+	
+	
+	#output file
+	open ( outputfile, ">".$myfile);
+	
+	
+	$port = "http(80)";
+	$getmess = "GET " . $page ." HTTP/1.1\n" . $auth;
+
+	print "sending message - $getmess\n";
+	print outputfile "$getmess\n\n";
+
+	$sock = IO::Socket::INET->new 	
+		(
+		 PeerAddr => $host,   PeerPort => $port,  Proto => 'tcp', 
+		) ;
+
+ 
+	print $sock "$getmess\n\n";
+
+ 
+	while(<$sock>) {
+ 
+	  print outputfile $_;
+ 
+	}	
+  	
+	close ($sock);
+	close (outputfile);
+}
+
+sub prntfeatures 
+{
+
+	($release,$package,$features,$myfile,$domain)=@_;
+	
+	$features = $features."<dt";
+
+	
+
+	while ( $features =~ /dt\>(.*?)\<\/dt(.*?)\<dt/sg  ){
+		$myfeat = $1;
+		$subfeat =$2;
+		
+		$myfeat =~ s/\n/ /sg;
+		
+		pos($features) = pos($features) -2;
+		
+		$mystr="";
+		while ( $subfeat =~ /\<dd\>(.*?)\<\/dd\>/sg) {
+			$mysubfeat = $mysubfeat.$mystr.$1;
+			$mystr = " & ";
+		}
+		undef $mystr;
+	$mysubfeat =~ s/,/ /sg;
+	$mysubfeat =~ s/\n//sg;
+	$mysubfeat =~ s/\<.*?\>//sg;
+	
+	$release =~ s/\\//sg;	
+	print $myfile " $release, $domain, $package, $myfeat, $mysubfeat\n";
+	
+	$mysubfeat = "";	
+	}
+		
+
+}
+	
+sub loadfile
+{
+
+	$/ = " ";
+	#arguments
+	($myfile)=@_;
+	open ( inputfile, "<".$myfile);
+	my $contents = do { local $/;  <inputfile> };
+	close(inputfile);
+	return $contents;
+
+}
+
+sub td_roadmap
+{
+
+
+	#arguments
+	($infile,$outfile,$domain,@releases)=@_;
+
+	
+	$roadmap=loadfile $infile;
+	open ( outputfile, ">>".$outfile);
+
+
+
+	foreach (@releases) {
+		
+		$exp="\\<h2\\>.*?\\>".$_;
+		
+		if ($roadmap =~ m /($exp)/sg) { 
+			print "Found entry for $_ \n";
+			$relroad =$';	
+			
+			if ($relroad =~ m /(.*?)\<h2/sg) { $relroad =$1;}
+			$i=0;	
+			while ($relroad=~ m/\<h3\>.*\>(.*?)\<.*<\/h3/g) {
+				$package = $1;		
+				$ppos[$i]= pos($relroad);
+				$pname[$i]= $package;
+				$i++;
+			}
+			for ( $i=0;$i<($#ppos); $i++){
+				$features= substr ($relroad, $ppos[$i],$ppos[$i+1]-$ppos[$i]);
+				prntfeatures($_,$pname[$i],$features,outputfile,$domain);
+			}
+			$features= substr ($relroad, $ppos[$i]);
+		
+			prntfeatures($_,$pname[$i],$features,outputfile,$domain);
+			@ppos ="";
+			@pname ="";
+			undef ($features);
+		}
+			 	
+
+	}
+	
+	
+
+	close (outputfile);
+
+
+}
+
+
+#help print
+sub printhelp
+{
+
+	print "\n\n version 0.2 
+	\ngettd.pl -t=url -d=domain \nrequired parameters:\n\t -t url containing the technology domain roadmap\n\t -d the technology domain name
+	\n Optional parameters\n\t-o filename ,the output is logged into the output.csv file by default\n\t-h for help";
+	exit;
+}
+
+
+#process command line options
+sub cmd_options
+{
+
+
+  my $help;
+
+
+  GetOptions('h' => \$help,'t=s'=> \$target_url, 'd=s' => \$tdomain , 'o=s' => \$csvfile);
+
+  if ($help) {
+    printhelp;
+  }
+
+  
+ if ( not $target_url) {
+
+	print "ERROR-missing arguments target url\n";
+	printhelp;	
+  } 
+
+ if (not $tdomain){
+	print "ERROR-missing arguments domain level\n";
+	printhelp;
+ }
+
+ 	print "\nINFO-downloading $target_url with label $tdomain\n";
+  
+
+ if (not $csvfile) {
+	$csvfile="output.csv";
+ }
+ print "\nINFO-output recorded in $csvfile \n";
+        
+
+
+
+}
+#main
+$/ = " ";
+
+#file containing login details from http cookie
+$mycookie = loadfile("mycookie.txt");
+
+#$auth ="Authorization: Basic Zm91bmRhdGlvbjp0ZXN0MA==";
+$auth = "Cookie: " . $mycookie ;
+
+#foundation releases - add as required
+@releases=("Symbian\\^2","Symbian\\^3","Symbian\\^4");
+
+
+$host1 = "developer.symbian.org";
+
+
+cmd_options();
+
+getpage($target_url, $host1, $auth, "debug.txt");
+td_roadmap("debug.txt" , $csvfile, $tdomain ,@releases);