scripts/gettd.pl
changeset 28 ab800b3e45ab
parent 25 48be12a54405
--- a/scripts/gettd.pl	Wed Jan 20 12:00:05 2010 +0000
+++ b/scripts/gettd.pl	Wed Jan 27 10:52:27 2010 +0000
@@ -1,484 +1,484 @@
-#!/usr/bin/perl
-
- 
-
-use IO::Socket; 
-use Getopt::Long;
-
-
-my $target_url; #target url for the roadmap
-my $tdomain; #tag for the domain to be use in csv file
-my $csvfile; #output csv file name
-my $authon= '';	 #does it require authorisation? default is false
-
-
-my $count_target; #this value is used to pass a string to match and count on each package backlog
-my $ispackage;
-my $splitbklogs;
-my $summaryheader="ID\tPackage\tFeatures\tFormat\tHttp\n" ;
-my $newtdformat = 0;
-my @blist = ("Tracking_Package_features","Kernel_EPLization_Backlog",
-    "Package_Backlog_Q","Wishlist","Test_Package","Backlog_Details");
-
-
-sub blacklist
-{
- ($name)=@_;
-      
- foreach(@blist) {
- 
- if ( $name =~ m/$_/sg) { print "WARNING - $name is blacklisted\n"; return 1;}
- }
- 
- return 0;
-
-}
-
-
-sub getpage
-{
-	#arguments
-	($page,$host,$auth,$myfile)=@_;
-	
-	
-	#output file
-	open ( outputfile, ">".$myfile);
-	
-	
-	$port = "http(80)";
-	$getmess = "GET " . $page ." HTTP/1.1\n" . $auth;
-
-	print "INFO - sending message - $getmess\n";
-	print outputfile "$getmess\n\n";
-
-	$sock = IO::Socket::INET->new 	
-		(
-		 PeerAddr => $host,   PeerPort => $port,  Proto => 'tcp', 
-		) ;
-
- 
-	print $sock "$getmess\n\n";
-
- 
-	while(<$sock>) {
- 
-	  print outputfile $_;
- 
-	}	
-  	
-	close ($sock);
-	close (outputfile);
-}
-
-sub prntfeatures 
-{
-
-	($release,$package,$features,$myfile,$domain)=@_;
-	
-  $release =~ s/\\//sg;	
-	$csvdel ="\",\"";
-	$invcoma ="\"";
- if ($newtdformat) {
-  $package =~ s/backlog//sgi;
-  print $myfile "$invcoma $release $csvdel $domain $csvdel $package $csvdel $myfeat $invcoma\n";
-  
- } else {
-		
-	$features = $features."<dt";
-
-	
-
-	while ( $features =~ /dt\>(.*?)\<\/dt(.*?)\<dt/sg  ){
-		$myfeat = $1;
-		$subfeat =$2;
-		
-		$myfeat =~ s/\n/ /sg;
-		
-		pos($features) = pos($features) -2;
-		
-		$mystr="";
-		while ( $subfeat =~ /\<dd\>(.*?)\<\/dd\>/sg) {
-			$mysubfeat = $mysubfeat.$mystr.$1;
-			$mystr = " & ";
-		}
-		undef $mystr;
-	$mysubfeat =~ s/,/ /sg;
-	$mysubfeat =~ s/\n//sg;
-	$mysubfeat =~ s/\<.*?\>//sg;
-	
-
-	print $myfile "$invcoma $release $csvdel $domain $csvdel $package $csvdel $myfeat $csvdel $csvdel $csvdel $mysubfeat $invcoma\n";
-	
-	$mysubfeat = "";	
-	}
-		
- }
-}
-	
-sub loadfile
-{
-
-	$/ = " ";
-	#arguments
-	($myfile)=@_;
-	open ( inputfile, "<".$myfile);
-	my $contents = do { local $/;  <inputfile> };
-	close(inputfile);
-	return $contents;
-
-}
-
-sub td_roadmap
-{
-
-
-	#arguments
-	($infile,$outfile,$domain,@releases)=@_;
-
-	
-	$roadmap=loadfile $infile;
-	open ( outputfile, ">>".$outfile);
-
-
-  if ($newtdformat) {
-       print "Processing new TD roadmap format\n";
-         if ($roadmap =~ m /Contents\<\/h2\>.*?\<\/table/sg) { $roadmap =$';}
-         foreach (@releases) {
-          $exp=$_." Roadmap";
-		         
-           if ($roadmap =~ m /($exp)/sg) { 
-			     print "PASS - Found entry for $_ \n";
-			     $relroad =$';	
-			
-			     if ($roadmap =~ m /table\>(.*?)\<\/table/sg) { $relroad =$1;}
-			      $relroad =~ s/\n//sg;
-			      
-            $skipfirst =1; #skipping the header of the table         
-            while ($relroad =~ m/\<tr\>(.*?)\<\/tr(.*)/sg){
-                  $myfeat=$1;
-                  $relroad =$2;
-                  if ($skipfirst) {$skipfirst=0;next;}
-                  $package="";
-                  if  ($myfeat =~ m/title\=\"(.*?)\"/sg) {  $package=$1; } #looking for package name
-                                  
-                  $myfeat=~ s/\<\/td\>\<td\>/\",\"/sg;   
-                  $myfeat=~ s/\<.*?\>//sg;
-                 
-                 if ($myfeat =~m/[A-z]/sg ){prntfeatures($_,$package,$myfeat,outputfile,$domain);}
-                
-                 }  		     
-         }
-        }
-  } else {
-
-	 foreach (@releases) {
-		
-	 	$exp="\\<h2\\>.*?\\>".$_;
-		  
-		if ($roadmap =~ m /($exp)/sg) { 
-			print "PASS - Found entry for $_ \n";
-			$relroad =$';	
-			
-			if ($relroad =~ m /(.*?)\<h2/sg) { $relroad =$1;}
-			$i=0;	
-			while ($relroad=~ m/\<h3\>.*\>(.*?)\<.*<\/h3/g) {
-				$package = $1;		
-				$ppos[$i]= pos($relroad);
-				$pname[$i]= $package;
-				$i++;
-			}
-			for ( $i=0;$i<($#ppos); $i++){
-				$features= substr ($relroad, $ppos[$i],$ppos[$i+1]-$ppos[$i]);
-				prntfeatures($_,$pname[$i],$features,outputfile,$domain);
-			}
-			$features= substr ($relroad, $ppos[$i]);
-		
-			prntfeatures($_,$pname[$i],$features,outputfile,$domain);
-			@ppos ="";
-			@pname ="";
-			undef ($features);
-		}
-	}		 	
-
-	}
-	
-	
-
-	close (outputfile);
-
-
-}
-
-
-sub parse_category {
-
-	#arguments
-	($infile)=@_;
-
-	my @mylink;
-
-	$mypage=loadfile $infile;
-	$i=0;	
-	if ( $mypage =~ m/Pages in category(.*)\<\/table/sg) {
-		print "INFO - Category page found\n";
-		$mypage = $1;
-		
-		while ($mypage =~ m /\<a href\=\"(\/wiki\/index\.php\/.*?)\"/g) {
-			
-			$mylink[$i] = $1;	
-			$i++;
-			
-		}
-	print "INFO - Found $i items in the category page\n"
-	}
-	return @mylink;
-}
-
-sub bklog_domain {
- #argument
-($mytechdomian)=@_;
-
-  $mytechdomian =~s/\)//sg;
-  $mytechdomian =~s/\(//sg;
-  
- $domaininfo=loadfile ("package_domains.csv");
- if ($domaininfo =~ m/$mytechdomian.*?\{(.*?)\}/sgi ) {
-      return $1;
- }
-  print "ERROR - domain not found for $mytechdomian\n";
-  return "orphan";
- 
-}
-
-sub parse_bklog {
-	
-	#arguments
-	($infile,$outfile,$id)=@_;
-	$mypkg=loadfile $infile;
-	#list if the bklog has been ported to the new bugzilla based format
-  $headerformat= "wiki_format";
-  
-	
-	open ( outputfile, ">>".$outfile);
-	open ( soutputfile, ">>"."summary_".$outfile);
-	
-	if ($mypkg =~ m/index\.php\/(.*?) HTTP/sg) {
-  
-		$pagename = $1;
-		
-    if ($splitbklogs) {
-      $whichtd =  bklog_domain($pagename);
-      open ( tdoutputfile, ">>".$whichtd. "_".$outfile);
-     }
-    
-    
-    
-    if (blacklist($pagename)) {
-       
-    	  close (outputfile);
-	      close (soutputfile);
-	    
-	      if ($splitbklogs) { close (tdoutputfile);}
-        return 0;
-    }
-		print "INFO -Processing Package $pagename \n";
-		$i=0;
-		$found_counter =0;
-		
-		if ($mypkg =~m/class\=\"bugzilla sortable\"/sg ) { $headerformat="autobug_format"; }
-		
-		while ($mypkg =~ m/\<tr.*?\>(.*?)\<\/tr/sg) { 
-			$myheader= $&;
-      if ($myheader =~ m/style=\"background-color\:/sg) {
-        next;
-      }
-			$myfeat= $1;
-			$myfeat =~ s/\<\/td\>/\t/sg;
-			$myfeat =~ s/\<.*?\>//sg;
-			$myfeat =~ s/\n//sg;
-			
-			if ($myfeat =~ m/IDPStatus/sg) { #header for bugzilla mediawiki plugin
-        next;
-      }
-			
-			if ($myfeat =~ m/[A-z]/sg and not $myfeat =~ m/\&lt\;etc/sg and 
-			not $myfeat =~ m/\&lt\;Feature/sg and not $myfeat =~ m/Item not available/sg) {
-				print outputfile "$pagename\t$myfeat\n";
-				if ($splitbklogs) { print tdoutputfile "$pagename\t$myfeat\n";}
-        
-      #  print "matching $myfeat with $count_target\n"    ;
-        if ($myfeat =~ m/$count_target/sg) {$found_counter++;}
-        $i++;
-			}
-			
-		}
-  
-  if ($count_target){
-    $mycount=$i."\t".$found_counter;
-  } else {
-    $mycount=$i;
-  }
-	
-	if ($splitbklogs) { 
-    print soutputfile "$id\t$pagename\t$mycount\t$headerformat\t$whichtd\thttp://developer.symbian.org/wiki/index.php/$pagename\n";
-	} else {
-    print soutputfile "$id\t$pagename\t$mycount\t$headerformat\thttp://developer.symbian.org/wiki/index.php/$pagename\n";
-  }
-
-	}
-
-	close (outputfile);
-	close (soutputfile);
-  if ($splitbklogs) { close (tdoutputfile);}
-                           
-}
-
-
-
-
-#help print
-sub printhelp
-{
-       
-	print "\n\n version 1.1
-	\ngettd.pl -t=url -d=domain \n\nRequired parameters for Technology Roadmaps:\n\t -t url containing the technology domain roadmap\n\t -d the technology domain name
-	\n\nOptional Parmeters for Technology Roadmaps\n\t-new if the roadmap has the new wiki format
-  \n\nRequired Parameters for Package backlogs\n\t-p for package backlog analysis. just run gettd.pl -p
-  \n\nOptional Pararmeters for Package backlogs\n\t -compare [f1] [f2] compares two package summary files for changes ignores order
-  \n\t -split splits the content of the backlog output into technology domains. requires package_domains.csv file with mapping details
-  \n\t -count=regexp counts the times that a package backlog line matches the regexp, the results are output to the summary file
-  \n\nCommonOptional parameters\n\t-o filename ,the output is logged into the output.csv file by default\n\t-h for help
-	\n\t recommend to run under cygwin environment and perl version v5.10.0 \n
-  \n\t pages blacklisted for package backlogs are @blist\n";
-	exit;
-}
-
-
-
-#compare bklogs
-sub compare_bklogs {
-	#arguments
-	(@bklogs)=@_;
-	
-	if (not $#bklogs == 1) { printhelp;}
-
-	
-	$cmd ="cut -f 2,3 ". $bklogs[0] . " | sort -u > tmp1.txt";
-	
-	system($cmd);
-	
-	$cmd ="cut -f 2,3 ". $bklogs[1] . " | sort -u > tmp2.txt";
-	system($cmd);
-	
-	exec ("diff tmp1.txt tmp2.txt | grep '[<|>]'");
-	system("rm temp*.txt");
-	
-	exit;
-
-}
-
-
-
-
-#process command line options
-sub cmd_options
-{
-
-  my $help;
-  my @compare;
-
-
-  GetOptions('h' => \$help,'t=s'=> \$target_url, 'd=s' => \$tdomain , 'o=s' => \$csvfile, 
-	'a' => \$authon , 'p' => \$ispackage, 'compare=s{2}' =>\@compare, 'new' => \$isnewformat,
-   'split' => \$splitbklogs, 'count=s' => \$count_target);
-
-  if (@compare) {
-	compare_bklogs @compare;
-	
-  }
-  if ($count_target) {
-    print "INFO - Seaching for $count_target\n";
-  }
-
-  if ($help) {
-    printhelp;
-  }
-
-
- if ($ispackage) {
-
- 	$tdomain =" ";
-	$target_url = "http://developer.symbian.org/wiki/index.php/Category:Package_Backlog";
-	
- }  
- if ($isnewformat){
-    $newtdformat = 1;
- 
- }
-
- if ( not $target_url) {
-
-	print "ERROR-missing arguments target url\n";
-	printhelp;	
-  } 
-
-
- if (not $tdomain){
-	print "ERROR-missing arguments domain level\n";
-	printhelp;
- }
-
- 	print "\nINFO-downloading $target_url with label $tdomain\n";
-  
-
- if (not $csvfile) {
-	if (not $ispackage) { 
-		$csvfile="output.csv";
-		
-	} else {
-		$csvfile="output.txt";
-		system ("rm *output.txt");
-	
-	}
- }
- print "\nINFO-output recorded in $csvfile \n";
-
-                                      
-
-}
-#main
-$/ = " ";
-$host1 = "developer.symbian.org";
-
-cmd_options();
-
-if ($authon) {
-	#file containing login details from http cookie
-	$mycookie = loadfile("mycookie.txt");
-
-	$auth = "Cookie: " . $mycookie ;
-}
-
-
-if ($ispackage) {
-	getpage($target_url, $host1, $auth, "debug.txt");
-	@bklog = parse_category("debug.txt");
-	$j=0;
-	
-	foreach (@bklog) {
-		getpage("http://".$host1.$_, $host1, $auth, "pkg".$j.".txt");
-		parse_bklog ("pkg".$j.".txt",$csvfile, $j);
-		$j++;
-		
-	
-
-	}
-
-} else {
-
-	#foundation releases - add as required
-	@releases=("Symbian\\^2","Symbian\\^3","Symbian\\^4");
-
-	getpage($target_url, $host1, $auth, "debug.txt");
-	td_roadmap("debug.txt" , $csvfile, $tdomain ,@releases);
+#!/usr/bin/perl
+
+ 
+
+use IO::Socket; 
+use Getopt::Long;
+
+
+my $target_url; #target url for the roadmap
+my $tdomain; #tag for the domain to be use in csv file
+my $csvfile; #output csv file name
+my $authon= '';	 #does it require authorisation? default is false
+
+
+my $count_target; #this value is used to pass a string to match and count on each package backlog
+my $ispackage;
+my $splitbklogs;
+my $summaryheader="ID\tPackage\tFeatures\tFormat\tHttp\n" ;
+my $newtdformat = 0;
+my @blist = ("Tracking_Package_features","Kernel_EPLization_Backlog",
+    "Package_Backlog_Q","Wishlist","Test_Package","Backlog_Details");
+
+
+sub blacklist
+{
+ ($name)=@_;
+      
+ foreach(@blist) {
+ 
+ if ( $name =~ m/$_/sg) { print "WARNING - $name is blacklisted\n"; return 1;}
+ }
+ 
+ return 0;
+
+}
+
+
+sub getpage
+{
+	#arguments
+	($page,$host,$auth,$myfile)=@_;
+	
+	
+	#output file
+	open ( outputfile, ">".$myfile);
+	
+	
+	$port = "http(80)";
+	$getmess = "GET " . $page ." HTTP/1.1\n" . $auth;
+
+	print "INFO - sending message - $getmess\n";
+	print outputfile "$getmess\n\n";
+
+	$sock = IO::Socket::INET->new 	
+		(
+		 PeerAddr => $host,   PeerPort => $port,  Proto => 'tcp', 
+		) ;
+
+ 
+	print $sock "$getmess\n\n";
+
+ 
+	while(<$sock>) {
+ 
+	  print outputfile $_;
+ 
+	}	
+  	
+	close ($sock);
+	close (outputfile);
+}
+
+sub prntfeatures 
+{
+
+	($release,$package,$features,$myfile,$domain)=@_;
+	
+  $release =~ s/\\//sg;	
+	$csvdel ="\",\"";
+	$invcoma ="\"";
+ if ($newtdformat) {
+  $package =~ s/backlog//sgi;
+  print $myfile "$invcoma $release $csvdel $domain $csvdel $package $csvdel $myfeat $invcoma\n";
+  
+ } else {
+		
+	$features = $features."<dt";
+
+	
+
+	while ( $features =~ /dt\>(.*?)\<\/dt(.*?)\<dt/sg  ){
+		$myfeat = $1;
+		$subfeat =$2;
+		
+		$myfeat =~ s/\n/ /sg;
+		
+		pos($features) = pos($features) -2;
+		
+		$mystr="";
+		while ( $subfeat =~ /\<dd\>(.*?)\<\/dd\>/sg) {
+			$mysubfeat = $mysubfeat.$mystr.$1;
+			$mystr = " & ";
+		}
+		undef $mystr;
+	$mysubfeat =~ s/,/ /sg;
+	$mysubfeat =~ s/\n//sg;
+	$mysubfeat =~ s/\<.*?\>//sg;
+	
+
+	print $myfile "$invcoma $release $csvdel $domain $csvdel $package $csvdel $myfeat $csvdel $csvdel $csvdel $mysubfeat $invcoma\n";
+	
+	$mysubfeat = "";	
+	}
+		
+ }
+}
+	
+sub loadfile
+{
+
+	$/ = " ";
+	#arguments
+	($myfile)=@_;
+	open ( inputfile, "<".$myfile);
+	my $contents = do { local $/;  <inputfile> };
+	close(inputfile);
+	return $contents;
+
+}
+
+sub td_roadmap
+{
+
+
+	#arguments
+	($infile,$outfile,$domain,@releases)=@_;
+
+	
+	$roadmap=loadfile $infile;
+	open ( outputfile, ">>".$outfile);
+
+
+  if ($newtdformat) {
+       print "Processing new TD roadmap format\n";
+         if ($roadmap =~ m /Contents\<\/h2\>.*?\<\/table/sg) { $roadmap =$';}
+         foreach (@releases) {
+          $exp=$_." Roadmap";
+		         
+           if ($roadmap =~ m /($exp)/sg) { 
+			     print "PASS - Found entry for $_ \n";
+			     $relroad =$';	
+			
+			     if ($roadmap =~ m /table\>(.*?)\<\/table/sg) { $relroad =$1;}
+			      $relroad =~ s/\n//sg;
+			      
+            $skipfirst =1; #skipping the header of the table         
+            while ($relroad =~ m/\<tr\>(.*?)\<\/tr(.*)/sg){
+                  $myfeat=$1;
+                  $relroad =$2;
+                  if ($skipfirst) {$skipfirst=0;next;}
+                  $package="";
+                  if  ($myfeat =~ m/title\=\"(.*?)\"/sg) {  $package=$1; } #looking for package name
+                                  
+                  $myfeat=~ s/\<\/td\>\<td\>/\",\"/sg;   
+                  $myfeat=~ s/\<.*?\>//sg;
+                 
+                 if ($myfeat =~m/[A-z]/sg ){prntfeatures($_,$package,$myfeat,outputfile,$domain);}
+                
+                 }  		     
+         }
+        }
+  } else {
+
+	 foreach (@releases) {
+		
+	 	$exp="\\<h2\\>.*?\\>".$_;
+		  
+		if ($roadmap =~ m /($exp)/sg) { 
+			print "PASS - Found entry for $_ \n";
+			$relroad =$';	
+			
+			if ($relroad =~ m /(.*?)\<h2/sg) { $relroad =$1;}
+			$i=0;	
+			while ($relroad=~ m/\<h3\>.*\>(.*?)\<.*<\/h3/g) {
+				$package = $1;		
+				$ppos[$i]= pos($relroad);
+				$pname[$i]= $package;
+				$i++;
+			}
+			for ( $i=0;$i<($#ppos); $i++){
+				$features= substr ($relroad, $ppos[$i],$ppos[$i+1]-$ppos[$i]);
+				prntfeatures($_,$pname[$i],$features,outputfile,$domain);
+			}
+			$features= substr ($relroad, $ppos[$i]);
+		
+			prntfeatures($_,$pname[$i],$features,outputfile,$domain);
+			@ppos ="";
+			@pname ="";
+			undef ($features);
+		}
+	}		 	
+
+	}
+	
+	
+
+	close (outputfile);
+
+
+}
+
+
+sub parse_category {
+
+	#arguments
+	($infile)=@_;
+
+	my @mylink;
+
+	$mypage=loadfile $infile;
+	$i=0;	
+	if ( $mypage =~ m/Pages in category(.*)\<\/table/sg) {
+		print "INFO - Category page found\n";
+		$mypage = $1;
+		
+		while ($mypage =~ m /\<a href\=\"(\/wiki\/index\.php\/.*?)\"/g) {
+			
+			$mylink[$i] = $1;	
+			$i++;
+			
+		}
+	print "INFO - Found $i items in the category page\n"
+	}
+	return @mylink;
+}
+
+sub bklog_domain {
+ #argument
+($mytechdomian)=@_;
+
+  $mytechdomian =~s/\)//sg;
+  $mytechdomian =~s/\(//sg;
+  
+ $domaininfo=loadfile ("package_domains.csv");
+ if ($domaininfo =~ m/$mytechdomian.*?\{(.*?)\}/sgi ) {
+      return $1;
+ }
+  print "ERROR - domain not found for $mytechdomian\n";
+  return "orphan";
+ 
+}
+
+sub parse_bklog {
+	
+	#arguments
+	($infile,$outfile,$id)=@_;
+	$mypkg=loadfile $infile;
+	#list if the bklog has been ported to the new bugzilla based format
+  $headerformat= "wiki_format";
+  
+	
+	open ( outputfile, ">>".$outfile);
+	open ( soutputfile, ">>"."summary_".$outfile);
+	
+	if ($mypkg =~ m/index\.php\/(.*?) HTTP/sg) {
+  
+		$pagename = $1;
+		
+    if ($splitbklogs) {
+      $whichtd =  bklog_domain($pagename);
+      open ( tdoutputfile, ">>".$whichtd. "_".$outfile);
+     }
+    
+    
+    
+    if (blacklist($pagename)) {
+       
+    	  close (outputfile);
+	      close (soutputfile);
+	    
+	      if ($splitbklogs) { close (tdoutputfile);}
+        return 0;
+    }
+		print "INFO -Processing Package $pagename \n";
+		$i=0;
+		$found_counter =0;
+		
+		if ($mypkg =~m/class\=\"bugzilla sortable\"/sg ) { $headerformat="autobug_format"; }
+		
+		while ($mypkg =~ m/\<tr.*?\>(.*?)\<\/tr/sg) { 
+			$myheader= $&;
+      if ($myheader =~ m/style=\"background-color\:/sg) {
+        next;
+      }
+			$myfeat= $1;
+			$myfeat =~ s/\<\/td\>/\t/sg;
+			$myfeat =~ s/\<.*?\>//sg;
+			$myfeat =~ s/\n//sg;
+			
+			if ($myfeat =~ m/IDPStatus/sg) { #header for bugzilla mediawiki plugin
+        next;
+      }
+			
+			if ($myfeat =~ m/[A-z]/sg and not $myfeat =~ m/\&lt\;etc/sg and 
+			not $myfeat =~ m/\&lt\;Feature/sg and not $myfeat =~ m/Item not available/sg) {
+				print outputfile "$pagename\t$myfeat\n";
+				if ($splitbklogs) { print tdoutputfile "$pagename\t$myfeat\n";}
+        
+      #  print "matching $myfeat with $count_target\n"    ;
+        if ($myfeat =~ m/$count_target/sg) {$found_counter++;}
+        $i++;
+			}
+			
+		}
+  
+  if ($count_target){
+    $mycount=$i."\t".$found_counter;
+  } else {
+    $mycount=$i;
+  }
+	
+	if ($splitbklogs) { 
+    print soutputfile "$id\t$pagename\t$mycount\t$headerformat\t$whichtd\thttp://developer.symbian.org/wiki/index.php/$pagename\n";
+	} else {
+    print soutputfile "$id\t$pagename\t$mycount\t$headerformat\thttp://developer.symbian.org/wiki/index.php/$pagename\n";
+  }
+
+	}
+
+	close (outputfile);
+	close (soutputfile);
+  if ($splitbklogs) { close (tdoutputfile);}
+                           
+}
+
+
+
+
+#help print
+sub printhelp
+{
+       
+	print "\n\n version 1.1
+	\ngettd.pl -t=url -d=domain \n\nRequired parameters for Technology Roadmaps:\n\t -t url containing the technology domain roadmap\n\t -d the technology domain name
+	\n\nOptional Parmeters for Technology Roadmaps\n\t-new if the roadmap has the new wiki format
+  \n\nRequired Parameters for Package backlogs\n\t-p for package backlog analysis. just run gettd.pl -p
+  \n\nOptional Pararmeters for Package backlogs\n\t -compare [f1] [f2] compares two package summary files for changes ignores order
+  \n\t -split splits the content of the backlog output into technology domains. requires package_domains.csv file with mapping details
+  \n\t -count=regexp counts the times that a package backlog line matches the regexp, the results are output to the summary file
+  \n\nCommonOptional parameters\n\t-o filename ,the output is logged into the output.csv file by default\n\t-h for help
+	\n\t recommend to run under cygwin environment and perl version v5.10.0 \n
+  \n\t pages blacklisted for package backlogs are @blist\n";
+	exit;
+}
+
+
+
+#compare bklogs
+sub compare_bklogs {
+	#arguments
+	(@bklogs)=@_;
+	
+	if (not $#bklogs == 1) { printhelp;}
+
+	
+	$cmd ="cut -f 2,3 ". $bklogs[0] . " | sort -u > tmp1.txt";
+	
+	system($cmd);
+	
+	$cmd ="cut -f 2,3 ". $bklogs[1] . " | sort -u > tmp2.txt";
+	system($cmd);
+	
+	exec ("diff tmp1.txt tmp2.txt | grep '[<|>]'");
+	system("rm temp*.txt");
+	
+	exit;
+
+}
+
+
+
+
+#process command line options
+sub cmd_options
+{
+
+  my $help;
+  my @compare;
+
+
+  GetOptions('h' => \$help,'t=s'=> \$target_url, 'd=s' => \$tdomain , 'o=s' => \$csvfile, 
+	'a' => \$authon , 'p' => \$ispackage, 'compare=s{2}' =>\@compare, 'new' => \$isnewformat,
+   'split' => \$splitbklogs, 'count=s' => \$count_target);
+
+  if (@compare) {
+	compare_bklogs @compare;
+	
+  }
+  if ($count_target) {
+    print "INFO - Seaching for $count_target\n";
+  }
+
+  if ($help) {
+    printhelp;
+  }
+
+
+ if ($ispackage) {
+
+ 	$tdomain =" ";
+	$target_url = "http://developer.symbian.org/wiki/index.php/Category:Package_Backlog";
+	
+ }  
+ if ($isnewformat){
+    $newtdformat = 1;
+ 
+ }
+
+ if ( not $target_url) {
+
+	print "ERROR-missing arguments target url\n";
+	printhelp;	
+  } 
+
+
+ if (not $tdomain){
+	print "ERROR-missing arguments domain level\n";
+	printhelp;
+ }
+
+ 	print "\nINFO-downloading $target_url with label $tdomain\n";
+  
+
+ if (not $csvfile) {
+	if (not $ispackage) { 
+		$csvfile="output.csv";
+		
+	} else {
+		$csvfile="output.txt";
+		system ("rm *output.txt");
+	
+	}
+ }
+ print "\nINFO-output recorded in $csvfile \n";
+
+                                      
+
+}
+#main
+$/ = " ";
+$host1 = "developer.symbian.org";
+
+cmd_options();
+
+if ($authon) {
+	#file containing login details from http cookie
+	$mycookie = loadfile("mycookie.txt");
+
+	$auth = "Cookie: " . $mycookie ;
+}
+
+
+if ($ispackage) {
+	getpage($target_url, $host1, $auth, "debug.txt");
+	@bklog = parse_category("debug.txt");
+	$j=0;
+	
+	foreach (@bklog) {
+		getpage("http://".$host1.$_, $host1, $auth, "pkg".$j.".txt");
+		parse_bklog ("pkg".$j.".txt",$csvfile, $j);
+		$j++;
+		
+	
+
+	}
+
+} else {
+
+	#foundation releases - add as required
+	@releases=("Symbian\\^2","Symbian\\^3","Symbian\\^4");
+
+	getpage($target_url, $host1, $auth, "debug.txt");
+	td_roadmap("debug.txt" , $csvfile, $tdomain ,@releases);
 }
\ No newline at end of file