tdroadmap_merger/gettd.pl
changeset 27 cad9fdd53748
parent 26 74210f1577f9
child 28 7dccd04e54f9
equal deleted inserted replaced
26:74210f1577f9 27:cad9fdd53748
   185 		$pagename = $1;
   185 		$pagename = $1;
   186 		print "INFO -Processing Package $pagename \n";
   186 		print "INFO -Processing Package $pagename \n";
   187 		$i=0;
   187 		$i=0;
   188 		#while ($mypkg =~ m/\<tr\>\<td\>(.*?)\<\/td\>/g) {
   188 		#while ($mypkg =~ m/\<tr\>\<td\>(.*?)\<\/td\>/g) {
   189 		while ($mypkg =~ m/\<tr\>(.*?)\<\/tr/sg) {
   189 		while ($mypkg =~ m/\<tr\>(.*?)\<\/tr/sg) {
   190 			$i++;
   190 			
   191 			$myfeat= $1;
   191 			$myfeat= $1;
   192 			$myfeat =~ s/\<\/td\>/\t/sg;
   192 			$myfeat =~ s/\<\/td\>/\t/sg;
   193 			$myfeat =~ s/\<.*?\>//sg;
   193 			$myfeat =~ s/\<.*?\>//sg;
   194 			$myfeat =~ s/\n//sg;
   194 			$myfeat =~ s/\n//sg;
   195 			print outputfile "$pagename\t$myfeat\n";
   195 			
       
   196 			if ($myfeat =~ m/[A-z]/sg ) {
       
   197 				print outputfile "$pagename\t$myfeat\n";
       
   198 				$i++;
       
   199 			}
   196 			
   200 			
   197 		}
   201 		}
   198 
   202 
   199 	print soutputfile "$pagename\t$i\n";
   203 	print soutputfile "$pagename\t$i\n";
   200 	
   204 	
   279 
   283 
   280 
   284 
   281 if ($ispackage) {
   285 if ($ispackage) {
   282 	getpage($target_url, $host1, $auth, "debug.txt");
   286 	getpage($target_url, $host1, $auth, "debug.txt");
   283 	@bklog = parse_category("debug.txt");
   287 	@bklog = parse_category("debug.txt");
   284 	$i=0;
   288 	$j=0;
   285 	foreach (@bklog) {
   289 	foreach (@bklog) {
   286 		getpage("http://".$host1.$_, $host1, $auth, "pkg".$i.".txt");
   290 		getpage("http://".$host1.$_, $host1, $auth, "pkg".$j.".txt");
   287 		parse_bklog ("pkg".$i.".txt",$csvfile);
   291 		parse_bklog ("pkg".$j.".txt",$csvfile);
   288 		$i++;
   292 		$j++;
   289 		
   293 		
   290 	
   294 	
   291 
   295 
   292 	}
   296 	}
   293 
   297