tdroadmap_merger/gettd.pl
changeset 22 dbe87093a3ca
child 23 8fb98cf0b1ac
equal deleted inserted replaced
21:f55ca49f7f44 22:dbe87093a3ca
       
     1 #!/usr/bin/perl
       
     2 
       
     3  
       
     4 
       
     5 use IO::Socket; 
       
     6 use Getopt::Long;
       
     7 
       
     8 
       
     9 my $target_url;
       
    10 my $tdomain;
       
    11 my $csvfile;
       
    12 
       
    13 sub getpage
       
    14 {
       
    15 	#arguments
       
    16 	($page,$host,$auth,$myfile)=@_;
       
    17 	
       
    18 	
       
    19 	#output file
       
    20 	open ( outputfile, ">".$myfile);
       
    21 	
       
    22 	
       
    23 	$port = "http(80)";
       
    24 	$getmess = "GET " . $page ." HTTP/1.1\n" . $auth;
       
    25 
       
    26 	print "sending message - $getmess\n";
       
    27 	print outputfile "$getmess\n\n";
       
    28 
       
    29 	$sock = IO::Socket::INET->new 	
       
    30 		(
       
    31 		 PeerAddr => $host,   PeerPort => $port,  Proto => 'tcp', 
       
    32 		) ;
       
    33 
       
    34  
       
    35 	print $sock "$getmess\n\n";
       
    36 
       
    37  
       
    38 	while(<$sock>) {
       
    39  
       
    40 	  print outputfile $_;
       
    41  
       
    42 	}	
       
    43   	
       
    44 	close ($sock);
       
    45 	close (outputfile);
       
    46 }
       
    47 
       
    48 sub prntfeatures 
       
    49 {
       
    50 
       
    51 	($release,$package,$features,$myfile,$domain)=@_;
       
    52 	
       
    53 	$features = $features."<dt";
       
    54 
       
    55 	
       
    56 
       
    57 	while ( $features =~ /dt\>(.*?)\<\/dt(.*?)\<dt/sg  ){
       
    58 		$myfeat = $1;
       
    59 		$subfeat =$2;
       
    60 		
       
    61 		$myfeat =~ s/\n/ /sg;
       
    62 		
       
    63 		pos($features) = pos($features) -2;
       
    64 		
       
    65 		$mystr="";
       
    66 		while ( $subfeat =~ /\<dd\>(.*?)\<\/dd\>/sg) {
       
    67 			$mysubfeat = $mysubfeat.$mystr.$1;
       
    68 			$mystr = " & ";
       
    69 		}
       
    70 		undef $mystr;
       
    71 	$mysubfeat =~ s/,/ /sg;
       
    72 	$mysubfeat =~ s/\n//sg;
       
    73 	$mysubfeat =~ s/\<.*?\>//sg;
       
    74 	
       
    75 	$release =~ s/\\//sg;	
       
    76 	print $myfile " $release, $domain, $package, $myfeat, $mysubfeat\n";
       
    77 	
       
    78 	$mysubfeat = "";	
       
    79 	}
       
    80 		
       
    81 
       
    82 }
       
    83 	
       
    84 sub loadfile
       
    85 {
       
    86 
       
    87 	$/ = " ";
       
    88 	#arguments
       
    89 	($myfile)=@_;
       
    90 	open ( inputfile, "<".$myfile);
       
    91 	my $contents = do { local $/;  <inputfile> };
       
    92 	close(inputfile);
       
    93 	return $contents;
       
    94 
       
    95 }
       
    96 
       
    97 sub td_roadmap
       
    98 {
       
    99 
       
   100 
       
   101 	#arguments
       
   102 	($infile,$outfile,$domain,@releases)=@_;
       
   103 
       
   104 	
       
   105 	$roadmap=loadfile $infile;
       
   106 	open ( outputfile, ">>".$outfile);
       
   107 
       
   108 
       
   109 
       
   110 	foreach (@releases) {
       
   111 		
       
   112 		$exp="\\<h2\\>.*?\\>".$_;
       
   113 		
       
   114 		if ($roadmap =~ m /($exp)/sg) { 
       
   115 			print "Found entry for $_ \n";
       
   116 			$relroad =$';	
       
   117 			
       
   118 			if ($relroad =~ m /(.*?)\<h2/sg) { $relroad =$1;}
       
   119 			$i=0;	
       
   120 			while ($relroad=~ m/\<h3\>.*\>(.*?)\<.*<\/h3/g) {
       
   121 				$package = $1;		
       
   122 				$ppos[$i]= pos($relroad);
       
   123 				$pname[$i]= $package;
       
   124 				$i++;
       
   125 			}
       
   126 			for ( $i=0;$i<($#ppos); $i++){
       
   127 				$features= substr ($relroad, $ppos[$i],$ppos[$i+1]-$ppos[$i]);
       
   128 				prntfeatures($_,$pname[$i],$features,outputfile,$domain);
       
   129 			}
       
   130 			$features= substr ($relroad, $ppos[$i]);
       
   131 		
       
   132 			prntfeatures($_,$pname[$i],$features,outputfile,$domain);
       
   133 			@ppos ="";
       
   134 			@pname ="";
       
   135 			undef ($features);
       
   136 		}
       
   137 			 	
       
   138 
       
   139 	}
       
   140 	
       
   141 	
       
   142 
       
   143 	close (outputfile);
       
   144 
       
   145 
       
   146 }
       
   147 
       
   148 
       
   149 #help print
       
   150 sub printhelp
       
   151 {
       
   152 
       
   153 	print "\n\n version 0.2 
       
   154 	\ngettd.pl -t=url -d=domain \nrequired parameters:\n\t -t url containing the technology domain roadmap\n\t -d the technology domain name
       
   155 	\n Optional parameters\n\t-o filename ,the output is logged into the output.csv file by default\n\t-h for help";
       
   156 	exit;
       
   157 }
       
   158 
       
   159 
       
   160 #process command line options
       
   161 sub cmd_options
       
   162 {
       
   163 
       
   164 
       
   165   my $help;
       
   166 
       
   167 
       
   168   GetOptions('h' => \$help,'t=s'=> \$target_url, 'd=s' => \$tdomain , 'o=s' => \$csvfile);
       
   169 
       
   170   if ($help) {
       
   171     printhelp;
       
   172   }
       
   173 
       
   174   
       
   175  if ( not $target_url) {
       
   176 
       
   177 	print "ERROR-missing arguments target url\n";
       
   178 	printhelp;	
       
   179   } 
       
   180 
       
   181  if (not $tdomain){
       
   182 	print "ERROR-missing arguments domain level\n";
       
   183 	printhelp;
       
   184  }
       
   185 
       
   186  	print "\nINFO-downloading $target_url with label $tdomain\n";
       
   187   
       
   188 
       
   189  if (not $csvfile) {
       
   190 	$csvfile="output.csv";
       
   191  }
       
   192  print "\nINFO-output recorded in $csvfile \n";
       
   193         
       
   194 
       
   195 
       
   196 
       
   197 }
       
   198 #main
       
   199 $/ = " ";
       
   200 
       
   201 #file containing login details from http cookie
       
   202 $mycookie = loadfile("mycookie.txt");
       
   203 
       
   204 #$auth ="Authorization: Basic Zm91bmRhdGlvbjp0ZXN0MA==";
       
   205 $auth = "Cookie: " . $mycookie ;
       
   206 
       
   207 #foundation releases - add as required
       
   208 @releases=("Symbian\\^2","Symbian\\^3","Symbian\\^4");
       
   209 
       
   210 
       
   211 $host1 = "developer.symbian.org";
       
   212 
       
   213 
       
   214 cmd_options();
       
   215 
       
   216 getpage($target_url, $host1, $auth, "debug.txt");
       
   217 td_roadmap("debug.txt" , $csvfile, $tdomain ,@releases);