tdroadmap_merger/gettd.pl
changeset 28 7dccd04e54f9
parent 27 cad9fdd53748
child 32 d1d21a2aeabc
equal deleted inserted replaced
27:cad9fdd53748 28:7dccd04e54f9
    10 my $tdomain; #tag for the domain to be use in csv file
    10 my $tdomain; #tag for the domain to be use in csv file
    11 my $csvfile; #output csv file name
    11 my $csvfile; #output csv file name
    12 my $authon= '';	 #does it require authorisation? default is false
    12 my $authon= '';	 #does it require authorisation? default is false
    13 
    13 
    14 my $ispackage;
    14 my $ispackage;
       
    15 
    15 
    16 
    16 sub getpage
    17 sub getpage
    17 {
    18 {
    18 	#arguments
    19 	#arguments
    19 	($page,$host,$auth,$myfile)=@_;
    20 	($page,$host,$auth,$myfile)=@_;
   173 }
   174 }
   174 
   175 
   175 sub parse_bklog {
   176 sub parse_bklog {
   176 	
   177 	
   177 	#arguments
   178 	#arguments
   178 	($infile,$outfile)=@_;
   179 	($infile,$outfile,$id)=@_;
   179 	$mypkg=loadfile $infile;
   180 	$mypkg=loadfile $infile;
   180 	open ( outputfile, ">>".$outfile);
   181 	open ( outputfile, ">>".$outfile);
   181 	open ( soutputfile, ">>"."summary_".$outfile);
   182 	open ( soutputfile, ">>"."summary_".$outfile);
   182 	
   183 	
   183 	if ($mypkg =~ m/index\.php\/(.*?) HTTP/sg) {
   184 	if ($mypkg =~ m/index\.php\/(.*?) HTTP/sg) {
   184 
   185   
   185 		$pagename = $1;
   186 		$pagename = $1;
   186 		print "INFO -Processing Package $pagename \n";
   187 		print "INFO -Processing Package $pagename \n";
   187 		$i=0;
   188 		$i=0;
   188 		#while ($mypkg =~ m/\<tr\>\<td\>(.*?)\<\/td\>/g) {
   189 		
   189 		while ($mypkg =~ m/\<tr\>(.*?)\<\/tr/sg) {
   190 		while ($mypkg =~ m/\<tr.*?\>(.*?)\<\/tr/sg) {
   190 			
   191 			next if ($& =~ m/style=\"background-color\:/s);
   191 			$myfeat= $1;
   192 			$myfeat= $1;
   192 			$myfeat =~ s/\<\/td\>/\t/sg;
   193 			$myfeat =~ s/\<\/td\>/\t/sg;
   193 			$myfeat =~ s/\<.*?\>//sg;
   194 			$myfeat =~ s/\<.*?\>//sg;
   194 			$myfeat =~ s/\n//sg;
   195 			$myfeat =~ s/\n//sg;
   195 			
   196 			
   196 			if ($myfeat =~ m/[A-z]/sg ) {
   197 			
       
   198 			if ($myfeat =~ m/[A-z]/sg and not $myfeat =~ m/\&lt\;etc/sg and 
       
   199 			not $myfeat =~ m/\&lt\;Feature/sg and not $myfeat =~ m/Item not available/sg) {
   197 				print outputfile "$pagename\t$myfeat\n";
   200 				print outputfile "$pagename\t$myfeat\n";
   198 				$i++;
   201 				$i++;
   199 			}
   202 			}
   200 			
   203 			
   201 		}
   204 		}
   202 
   205 
   203 	print soutputfile "$pagename\t$i\n";
   206 	print soutputfile "$id\t$pagename\t$i\thttp://developer.symbian.org/wiki/index.php/$pagename\n";
   204 	
   207 	
   205 	}
   208 
   206 
   209 	}
   207 
   210 
   208 
   211 	close (outputfile);
   209 
   212 	close (soutputfile);
   210 }
   213 
       
   214 
       
   215 }
       
   216 
       
   217 
       
   218 
   211 
   219 
   212 #help print
   220 #help print
   213 sub printhelp
   221 sub printhelp
   214 {
   222 {
   215 
   223 
   216 	print "\n\n version 0.4 
   224 	print "\n\n version 0.5 
   217 	\ngettd.pl -t=url -d=domain \nrequired parameters:\n\t -t url containing the technology domain roadmap\n\t -d the technology domain name
   225 	\ngettd.pl -t=url -d=domain \n\nrequired parameters:\n\t -t url containing the technology domain roadmap\n\t -d the technology domain name
   218 	\n Optional parameters\n\t-o filename ,the output is logged into the output.csv file by default\n\t-h for help
   226 	\n\nOptional parameters\n\t-o filename ,the output is logged into the output.csv file by default\n\t-h for help
   219 	\n\t-a setup authorisation by cookie follow instructions in http://developer.symbian.org/wiki/index.php/Roadmap_merger_script#Cookies
   227 	\n\t-a setup authorisation by cookie follow instructions \n\tin http://developer.symbian.org/wiki/index.php/Roadmap_merger_script#Cookies
   220 	\n\t -p adds support for package backlog analysis. just run gettd.pl -p";
   228 	\n\t -p adds support for package backlog analysis. just run gettd.pl -p
       
   229 	\n\t -compare [f1] [f2] compares two package summary files for changes ignores order\n";
   221 	exit;
   230 	exit;
   222 }
   231 }
       
   232 
       
   233 
       
   234 
       
   235 #compare bklogs
       
   236 sub compare_bklogs {
       
   237 	#arguments
       
   238 	(@bklogs)=@_;
       
   239 	
       
   240 	if (not $#bklogs == 1) { printhelp;}
       
   241 
       
   242 	
       
   243 	$cmd ="cut -f 2,3 ". $bklogs[0] . " | sort -u > tmp1.txt";
       
   244 	
       
   245 	system($cmd);
       
   246 	
       
   247 	$cmd ="cut -f 2,3 ". $bklogs[1] . " | sort -u > tmp2.txt";
       
   248 	system($cmd);
       
   249 	
       
   250 	exec ("diff tmp1.txt tmp2.txt | grep '[<|>]'");
       
   251 	system("rm temp*.txt");
       
   252 	
       
   253 	exit;
       
   254 
       
   255 }
       
   256 
       
   257 
   223 
   258 
   224 
   259 
   225 #process command line options
   260 #process command line options
   226 sub cmd_options
   261 sub cmd_options
   227 {
   262 {
   228 
   263 
   229   my $help;
   264   my $help;
   230 
   265   my @compare;
   231 
   266 
   232   GetOptions('h' => \$help,'t=s'=> \$target_url, 'd=s' => \$tdomain , 'o=s' => \$csvfile, 'a' => \$authon , 'p' => \$ispackage);
   267 
       
   268   GetOptions('h' => \$help,'t=s'=> \$target_url, 'd=s' => \$tdomain , 'o=s' => \$csvfile, 
       
   269 	'a' => \$authon , 'p' => \$ispackage, 'compare=s{2}' =>\@compare);
       
   270 
       
   271   if (@compare) {
       
   272 	compare_bklogs @compare;
       
   273 	
       
   274   }
   233 
   275 
   234   if ($help) {
   276   if ($help) {
   235     printhelp;
   277     printhelp;
   236   }
   278   }
       
   279 
   237 
   280 
   238  if ($ispackage) {
   281  if ($ispackage) {
   239 
   282 
   240  	$tdomain =" ";
   283  	$tdomain =" ";
   241 	$target_url = "http://developer.symbian.org/wiki/index.php/Category:Package_Backlog";
   284 	$target_url = "http://developer.symbian.org/wiki/index.php/Category:Package_Backlog";
   257 
   300 
   258  	print "\nINFO-downloading $target_url with label $tdomain\n";
   301  	print "\nINFO-downloading $target_url with label $tdomain\n";
   259   
   302   
   260 
   303 
   261  if (not $csvfile) {
   304  if (not $csvfile) {
   262 	$csvfile="output.csv";
   305 	if (not $ispackage) { 
       
   306 		$csvfile="output.csv";
       
   307 		system ("rm output.csv");
       
   308 	} else {
       
   309 		$csvfile="output.txt";
       
   310 		system ("rm *output.txt");
       
   311 
       
   312 	}
   263  }
   313  }
   264  print "\nINFO-output recorded in $csvfile \n";
   314  print "\nINFO-output recorded in $csvfile \n";
   265 
   315 
   266 
   316 
   267 
   317 
   286 	getpage($target_url, $host1, $auth, "debug.txt");
   336 	getpage($target_url, $host1, $auth, "debug.txt");
   287 	@bklog = parse_category("debug.txt");
   337 	@bklog = parse_category("debug.txt");
   288 	$j=0;
   338 	$j=0;
   289 	foreach (@bklog) {
   339 	foreach (@bklog) {
   290 		getpage("http://".$host1.$_, $host1, $auth, "pkg".$j.".txt");
   340 		getpage("http://".$host1.$_, $host1, $auth, "pkg".$j.".txt");
   291 		parse_bklog ("pkg".$j.".txt",$csvfile);
   341 		parse_bklog ("pkg".$j.".txt",$csvfile, $j);
   292 		$j++;
   342 		$j++;
   293 		
   343 		
   294 	
   344 	
   295 
   345 
   296 	}
   346 	}