Creating "code_churn" directory in utilities, and adding perl scripts for code chrun tool
authorChetan Kapoor <chetank@symbian.org>
Mon, 14 Sep 2009 13:40:02 +0100
changeset 49 c0d2a34bf681
parent 48 8e73266ba54f
child 50 a5ee079f00dd
Creating "code_churn" directory in utilities, and adding perl scripts for code chrun tool
code_churn/churn_core.pl
code_churn/fbf_churn.pl
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/code_churn/churn_core.pl	Mon Sep 14 13:40:02 2009 +0100
@@ -0,0 +1,567 @@
+#!perl -w
+
+# Copyright (c) 2009 Symbian Foundation Ltd
+# This component and the accompanying materials are made available
+# under the terms of the License "Eclipse Public License v1.0"
+# which accompanies this distribution, and is available
+# at the URL "http://www.eclipse.org/legal/epl-v10.html".
+#
+# Initial Contributors:
+# Symbian Foundation Ltd - initial contribution.
+# 
+# Contributors:
+#
+# Description:
+#
+
+use strict;
+use File::Find;
+use File::Copy;
+use Cwd;
+
+sub diffstat();
+
+my $Logs_Dir = $ARGV[0];
+my $dir_left = $ARGV[1];
+my $dir_right = $ARGV[2];
+my $dir_tmp_left = $ARGV[0].'\\'.$ARGV[1];
+my $dir_tmp_right = $ARGV[0].'\\'.$ARGV[2];
+
+print "left changeset $dir_left\n";
+print "right chnageset $dir_right\n";
+mkdir $dir_tmp_left;
+mkdir $dir_tmp_right;
+
+# default inclusions from churn.pl are "*.cpp", "*.c", "*.cxx", "*.h", "*.hpp", "*.inl" 
+my @file_pattern=('\.cpp$','\.c$','\.hpp$','\.h$','\.inl$','\.cxx$','\.hrh$');
+my $totallinecount=0;
+my $countcomments=0;
+
+if (! -d $Logs_Dir)
+{
+    die("$Logs_Dir does not exist \n");
+}
+
+$dir_left =~ m/^(\w+)\.[0-9a-fA-F]+/;
+my $package_name = $1;
+
+$dir_left =~ m/^\w+\.([0-9a-fA-F]+)/;
+my $changeset_left = $1;
+
+$dir_right =~ m/^\w+\.([0-9a-fA-F]+)/;
+my $changeset_right = $1;
+
+print "\nWorking on package: $package_name\n";
+print "\nProcessing $dir_left\n";
+find(\&process_files, $dir_left);
+#DEBUG INFO:
+print "\nTotal linecount for changed files in $dir_left is $totallinecount\n";
+my $code_size_left = $totallinecount;
+
+$totallinecount=0;
+print "\nProcessing $dir_right\n";
+find(\&process_files, $dir_right);
+#DEBUG INFO:
+print "\nTotal linecount for changed files in $dir_right is $totallinecount\n";    
+my $code_size_right = $totallinecount;
+
+my @diffs;
+
+if (-d $dir_tmp_left && -d $dir_tmp_left)
+{
+	@diffs = `diff -r -N $dir_tmp_left $dir_tmp_right`;
+}
+
+my $changed_lines=@diffs;
+my $diffsfile = $Logs_Dir.'\\'."dirdiffs.out";
+open (DIFFS, ">$diffsfile");
+print DIFFS @diffs;
+close (DIFFS);
+
+diffstat();
+
+$dir_tmp_left =~ s{/}{\\}g;
+$dir_tmp_right =~ s{/}{\\}g;
+
+if (-d $dir_tmp_left)
+{
+	system("rmdir /S /Q $dir_tmp_left");
+}
+
+if (-d $dir_tmp_right)
+{
+system("rmdir /S /Q $dir_tmp_right");
+}
+
+unlink $diffsfile;
+unlink "$Logs_Dir\\line_count_newdir.txt";
+
+print "\n** Finished processing $package_name **\n\n\n\n\n";
+
+exit(0);
+
+sub diffstat()
+{
+open (DIFFSFILE,"$diffsfile");
+
+my $curfile = "";
+my %changes = ();
+
+while (<DIFFSFILE>)
+{
+	my $line = $_;
+				# diff -r -N D:/mirror\fbf_churn_output\commsfw.000000000000\serialserver\c32serialserver\Test\te_C32Performance\USB PC Side Code\resource.h 
+				# diff -r <anything><changeset(12 chars)><slash><full_filename><optional_whitespace><EOL>
+	if ($line =~ m/^diff -r.*\.[A-Fa-f0-9]{12}[\/\\](.*)\s*$/)
+	{
+		$curfile = $1;
+		#DEBUG INFO:
+		#print "\t$curfile\n";
+		if (!defined $changes{$curfile})
+		{
+			$changes{$curfile} = {'a'=>0,'c'=>0,'d'=>0,'filetype'=>'unknown'};
+		}
+		
+		$curfile =~ m/\.(\w+)$/g;
+				
+		#if filetype known...
+		my $filetype = $+;
+		
+		$changes{$curfile}->{'filetype'}=uc($filetype);
+	}
+	elsif ($line =~ m/^(\d+)(,(\d+))?(d)\d+(,\d+)?/)
+	{	
+		if (defined $3)
+		{
+			$changes{$curfile}->{$4} += ($3-$1)+1;
+		}
+		else
+		{
+			$changes{$curfile}->{$4}++;
+		}
+	}
+	elsif ($line =~ m/^\d+(,\d+)?([ac])(\d+)(,(\d+))?/)
+	{	
+		if (defined $5)
+		{
+			$changes{$curfile}->{$2} += ($5-$3)+1;
+		}
+		else
+		{
+			$changes{$curfile}->{$2}++;
+		}	
+	}
+}
+
+close (DIFFSFILE);
+
+my %package_changes = ("CPP"=>0, "H"=>0, "HPP"=>0, "INL"=>0, "C"=>0, "CXX"=>0,"HRH"=>0,);
+my %package_deletions = ("CPP"=>0, "H"=>0, "HPP"=>0, "INL"=>0, "C"=>0, "CXX"=>0,"HRH"=>0,);
+my %package_additions = ("CPP"=>0, "H"=>0, "HPP"=>0, "INL"=>0, "C"=>0, "CXX"=>0,"HRH"=>0,);
+my $package_churn = 0;
+
+for my $file (keys %changes)
+{
+	$package_changes{$changes{$file}->{'filetype'}} += $changes{$file}->{'c'};
+	$package_deletions{$changes{$file}->{'filetype'}} += $changes{$file}->{'d'};
+	$package_additions{$changes{$file}->{'filetype'}} += $changes{$file}->{'a'};
+}
+
+
+#DEBUG INFO: For printing contents of hashes containing per filetype summary
+#print "\n\n\n\n";
+#print "package_changes:\n";
+#print map { "$_ => $package_changes{$_}\n" } keys %package_changes;
+#print "\n\n\n\n";
+#print "package_deletions:\n";
+#print map { "$_ => $package_deletions{$_}\n" } keys %package_deletions;
+#print "\n\n\n\n";
+#print "package_additions:\n";
+#print map { "$_ => $package_additions{$_}\n" } keys %package_additions;
+
+
+
+my $overall_changes = 0;
+for my $filetype (keys %package_changes)
+{
+	$overall_changes += $package_changes{$filetype};
+}
+
+my $overall_deletions = 0;
+for my $filetype (keys %package_deletions)
+{
+	$overall_deletions += $package_deletions{$filetype};
+}
+
+my $overall_additions = 0;
+for my $filetype (keys %package_additions)
+{
+	$overall_additions += $package_additions{$filetype};
+}
+
+
+$package_churn = $overall_changes + $overall_additions;
+
+print "\n\n\n\nSummary for Package: $package_name\n";
+print "-------------------\n";
+print "Changesets Compared: $dir_left and $dir_right\n";
+#print "Code Size for $dir_left = $code_size_left lines\n";
+#print "Code Size for $dir_right = $code_size_right lines\n";
+print "Total Lines Changed = $overall_changes\n";
+print "Total Lines Added = $overall_additions\n";
+print "Total Lines Deleted = $overall_deletions\n";
+print "Package Churn = $package_churn lines\n";
+
+my @header = qw(filetype a c d);
+
+my $outputfile = $Logs_Dir.'\\'."$package_name\_diffstat.csv";
+open(PKGSTATCSV, ">$outputfile") or die "Coudln't open $outputfile";
+
+
+
+print PKGSTATCSV " SF CODE-CHURN SUMMARY\n";
+print PKGSTATCSV "Package: $package_name\n";
+print PKGSTATCSV "Changesets Compared: $dir_left and $dir_right\n";
+#print PKGSTATCSV "Code Size for $dir_left = $code_size_left lines\n";
+#print PKGSTATCSV "Code Size for $dir_right = $code_size_right lines\n";
+print PKGSTATCSV "Total Lines Changed = $overall_changes\n";
+print PKGSTATCSV "Total Lines Added = $overall_additions\n";
+print PKGSTATCSV "Total Lines Deleted = $overall_deletions\n";
+print PKGSTATCSV "Package Churn = $package_churn lines\n\n\n\n\n";
+
+
+
+
+# print the header
+print PKGSTATCSV "FILENAME,";
+
+foreach my $name (@header)
+{
+  if ($name eq 'filetype')
+  {
+	print PKGSTATCSV uc($name).",";
+  }  
+  elsif ($name eq 'a')
+ {
+	print PKGSTATCSV "LINES_ADDED,";
+ }
+  elsif ($name eq 'c')
+ {
+	print PKGSTATCSV "LINES_CHANGED,";
+ }
+  elsif ($name eq 'd')
+ {
+	print PKGSTATCSV "LINES_DELETED,";
+ }
+    
+}
+
+print PKGSTATCSV "\n";
+
+foreach my $file (sort keys %changes)
+{
+  print PKGSTATCSV $file.",";
+  foreach my $key (@header)
+  {
+    if(defined $changes{$file}->{$key})
+    {
+      print PKGSTATCSV $changes{$file}->{$key};
+    }
+    print PKGSTATCSV ",";
+  }
+  print PKGSTATCSV "\n";
+}
+
+close (PKGSTATCSV);
+
+
+
+my $diffstat_summary = $Logs_Dir.'\\'."diffstat_summary.csv";
+
+if (-e $diffstat_summary)
+{ 
+	open(DIFFSTATCSV, ">>$diffstat_summary") or die "Coudln't open $outputfile";
+	print DIFFSTATCSV "$package_name,";
+	print DIFFSTATCSV "$changeset_left,";
+	print DIFFSTATCSV "$changeset_right,";
+	
+	#print DIFFSTATCSV ",";
+
+	foreach my $filetype (sort keys %package_changes)
+	{
+		if(defined $package_changes{$filetype})
+		{
+		  print DIFFSTATCSV $package_changes{$filetype}.",";
+		}
+	}
+
+	#print DIFFSTATCSV ",";
+	
+	foreach my $filetype (sort keys %package_additions)
+	{
+		if(defined $package_additions{$filetype})
+		{
+		  print DIFFSTATCSV $package_additions{$filetype}.",";
+		  
+		}
+	}
+	
+	#print DIFFSTATCSV ",";
+	
+	foreach my $filetype (sort keys %package_deletions)
+	{
+		if(defined $package_deletions{$filetype})
+		{
+		  print DIFFSTATCSV $package_deletions{$filetype}.",";
+		  #print DIFFSTATCSV ",";
+		}
+	}
+	
+	#print DIFFSTATCSV ",";
+	print DIFFSTATCSV "$overall_changes,";
+	print DIFFSTATCSV "$overall_additions,";
+	print DIFFSTATCSV "$overall_deletions,";
+	print DIFFSTATCSV "$package_churn,";
+
+	print DIFFSTATCSV "\n";
+	
+	close (DIFFSTATCSV);
+}
+else
+{
+	open(DIFFSTATCSV, ">$diffstat_summary") or die "Couldn't open $outputfile";
+
+	# print the header
+	print DIFFSTATCSV "PACKAGE_NAME,";
+	print DIFFSTATCSV "LEFT_CHANGESET,";
+	print DIFFSTATCSV "RIGHT_CHANGESET,";
+
+	#print DIFFSTATCSV ",";
+
+	foreach my $name (sort keys %package_changes)
+	{
+		print DIFFSTATCSV $name." CHANGES,";    
+	}
+	#print DIFFSTATCSV ",";
+
+
+	foreach my $name (sort keys %package_additions)
+	{
+		print DIFFSTATCSV $name." ADDITIONS,";    
+	}
+	#print DIFFSTATCSV ",";
+
+
+	foreach my $name (sort keys %package_deletions)
+	{
+		print DIFFSTATCSV $name." DELETIONS,";    
+	}
+	#print DIFFSTATCSV ",";
+	
+	print DIFFSTATCSV "PACKAGE_CHANGES,";
+	print DIFFSTATCSV "PACKAGE_ADDITIONS,";
+	print DIFFSTATCSV "PACKAGE_DELETIONS,";
+	print DIFFSTATCSV "PACKAGE_CHURN,";
+	print DIFFSTATCSV "\n";
+	
+	
+	print DIFFSTATCSV "$package_name,";
+	
+	print DIFFSTATCSV "$changeset_left,";
+	print DIFFSTATCSV "$changeset_right,";
+	
+	#print DIFFSTATCSV ",";
+
+	foreach my $filetype (sort keys %package_changes)
+	{
+		if(defined $package_changes{$filetype})
+		{
+		  print DIFFSTATCSV $package_changes{$filetype}.",";
+		}
+	}
+
+	#print DIFFSTATCSV ",";
+	
+	foreach my $filetype (sort keys %package_additions)
+	{
+		if(defined $package_additions{$filetype})
+		{
+		  print DIFFSTATCSV $package_additions{$filetype}.",";
+		  
+		}
+	}
+	
+	#print DIFFSTATCSV ",";
+	
+	foreach my $filetype (sort keys %package_deletions)
+	{
+		if(defined $package_deletions{$filetype})
+		{
+		  print DIFFSTATCSV $package_deletions{$filetype}.",";
+		}
+	}
+
+	#print DIFFSTATCSV ",";
+	print DIFFSTATCSV "$overall_changes,";
+	print DIFFSTATCSV "$overall_additions,";
+	print DIFFSTATCSV "$overall_deletions,";
+	print DIFFSTATCSV "$package_churn,";
+	
+	print DIFFSTATCSV "\n";
+	
+	close (DIFFSTATCSV);
+}
+
+
+
+}
+
+sub process_files() 
+{
+    my $lfile = $_;
+    my $lfile_fullpath=$File::Find::name;
+    $lfile_fullpath =~ s#\/#\\#g;
+    #print "$lfile\t\tFull path $lfile_fullpath\n" ;
+    if (-f $lfile)
+    { 
+        foreach my $regpat (@file_pattern)
+        {
+            if (lc($lfile) =~ m/$regpat/)
+            {
+                $lfile  =~ s#\/#\\#g;
+                #print "Processing file $lfile (Matched $regpat) \n"; #ck
+                #print `type $lfile`;
+                # We copy mathching files to a separate temp directory
+                # so that the final diff can simply diff the full dir
+                # Note :  RemoveNoneLOC routine edits the file in-situ.
+                my $lfile_abs = cwd().'\\'.$lfile;
+                my $lfile_local = $Logs_Dir.'\\'.$lfile_fullpath;
+                makepath($lfile_local);
+                print "%";
+                copy($lfile_abs,$lfile_local);
+				$totallinecount += RemoveNonLOC( $lfile, $lfile_local, "newdir" );
+            }
+        }
+    }   
+}
+
+
+sub makepath()
+{
+    my $absfile = shift; 
+    $absfile =~ s#\\#\/#g;
+    my @dirs = split /\//, $absfile;
+    pop @dirs;  # throw away the filename
+    my $path = "";
+    foreach my $dir (@dirs)
+    {
+        $path = ($path eq "") ? $dir : "$path/$dir";
+        if (!-d $path)
+        {
+#          print "making $path \n";
+          mkdir $path;
+        }
+    }
+}
+
+
+sub RemoveNonLOC($$$) {
+
+    # Gather arguments
+    my $file = shift;
+    my $original_file  = shift;
+    my $type_of_dir = shift;
+    
+#    print("\nDebug: in ProcessFile, file is $file, full file + path is $original_file \n");
+     
+	# Remove comments...
+	
+    # Set up the temporary files that will be used to perform the processing steps
+    my $temp1File = $original_file."temp1";
+    my $temp2File = $original_file."temp2";
+	
+    open(TEMP1, "+>$temp1File");
+    
+    if (!($countcomments)) {
+    
+     	# Remove any comments from the file
+		my $original_file_string;
+     	open INPUT, "<", $original_file;
+		{
+			local $/ = undef;
+			$original_file_string = <INPUT>;
+		}
+		close INPUT;
+ 
+     	my $dbl = qr/"[^"\\]*(?:\\.[^"\\]*)*"/s;
+        my $sgl = qr/'[^'\\]*(?:\\.[^'\\]*)*'/s;
+
+        my $C   = qr{/\*.*?\*/}s; # C style comments /*  */
+        my $CPP = qr{//.*}; # C+ style comments //
+        my $com = qr{$C|$CPP};
+        my $other = qr{.[^/"'\\]*}s; # all other '"
+        my $keep = qr{$sgl|$dbl|$other};
+     
+     	#Remove the comments (need to turn off warnings on the next regexp for unititialised variable)
+no warnings 'uninitialized';
+
+        $original_file_string=~ s/$com|($keep)/$1/gom;  
+        print TEMP1 "$original_file_string";
+
+use warnings 'uninitialized';
+    }
+    else {
+    
+        print("\n option --CountComments specified so comments will be included in the count\n");
+        #Just copy over original with comments still in it
+		copy($original_file,$temp1File); 
+    }
+   	 
+    close(TEMP1);
+   	
+ 	  
+    # Remove blank lines...
+#   print("\nDebug: Getting rid of blank lines in \n$temp1File to produce \n$temp2File \n");
+    open (TEMP1, "+<$temp1File"); # include lines + pre-processed code
+    open (TEMP2, "+>$temp2File"); 
+    
+    while (<TEMP1>) {
+		
+        if (!(/^\s*\n$/)) { # if line isn't blank write it to the new file 
+        print TEMP2 $_;
+	}
+    }
+    close(TEMP1);
+    close(TEMP2);
+     
+    #Copy the final file to the original file. This updated file will form the input to diff later.
+    #todo dont need chmod now?
+    chmod(oct("0777"), $original_file) or warn "\nCannot chmod $original_file : $!\n";
+#   print("\nCopying $temp2File\n to \n$original_file\n");
+    
+    #system("copy /Y \"$temp2File\" \"$original_file\"") == 0
+    #or print "\nERROR: Copy of $temp2File to $original_file failed\n";
+    copy($temp2File,$original_file);
+  	 
+    # Store original file size
+    
+    open(LINECOUNT, ">>$Logs_Dir\\line_count_$type_of_dir.txt");
+    open(SOURCEFILE, "<$original_file");
+    
+    my @source_code = <SOURCEFILE>;
+    print  LINECOUNT "\n$original_file   ";
+    my $linecount = scalar(@source_code);
+#	print  LINECOUNT scalar(@source_code);
+    print  LINECOUNT $linecount; 
+     
+    close(LINECOUNT);
+    close(SOURCEFILE);
+    
+    #system("del /F /Q $Logs_Dir\\line_count_$type_of_dir.txt");
+
+    #Delete the temporary files
+    unlink($temp1File);
+    unlink($temp2File);
+       
+    return $linecount;   
+}
\ No newline at end of file
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/code_churn/fbf_churn.pl	Mon Sep 14 13:40:02 2009 +0100
@@ -0,0 +1,149 @@
+#! perl -w
+
+# Copyright (c) 2009 Symbian Foundation Ltd
+# This component and the accompanying materials are made available
+# under the terms of the License "Eclipse Public License v1.0"
+# which accompanies this distribution, and is available
+# at the URL "http://www.eclipse.org/legal/epl-v10.html".
+#
+# Initial Contributors:
+# Symbian Foundation Ltd - initial contribution.
+# 
+# Contributors:
+#
+# Description:
+#
+
+use strict;
+use Getopt::Long;
+
+use FindBin;
+#my $churn_core = "D:\\mirror\\churn_core.pl";
+my $churn_core = "$FindBin::Bin\\churn_core.pl";
+my $churn_output_temp = "$FindBin::Bin\\fbf_churn_output";
+mkdir $churn_output_temp;
+
+sub Usage($)
+  {
+  my ($msg) = @_;
+  
+  print "$msg\n\n" if ($msg ne "");
+  
+	print <<'EOF';
+
+	
+fbf_churn.pl - simple script for calculating code churn in between two revisions 
+or labels for a package. This script can also be used to calculate code size for 
+a package.
+
+When used without a package name or filter, this script runs for all the packages
+in the BOM (build-info.xml) file supplied to it. 
+
+Important: 
+  This script uses clone_all_packages.pl which clones all repositories listed in 
+  the BOM or pull changes into a previously cloned repository.
+  
+  This script uses its accompayning script churn_core.pl - which should be
+  present in the same directory as this script.
+
+Limitations:
+  If a BOM is not supplied to the script using the -bom option, then the script 
+  runs on the package locations inside both MCL and FCL producing two results
+  for a single package. For running the script for calculating code churn between 
+  two release buils (using labels) or for calculating code size for a release build,
+  it is essential that a BOM (preferably for the newer build) is passed as an 
+  argument using the -bom option.
+  
+
+Options:
+
+-o --old		old revision or label for a package/respoitory
+
+-n --new		new revision or label for a package/respoitory
+
+--rev			revision for package/respoitory - Use this while calculating code size for a single package
+			
+--label			revision tag for package or release build - Use this while calculating code size
+
+-bom --bom		build-info.xml files supplied with Symbian PDKs
+
+-verbose		print the underlying "clone_all_packages" & "hg" commands before executing them
+
+-help			print this help information
+
+-package <RE>   	only process repositories matching regular expression <RE>
+
+-filter <RE>    	only process repositories matching regular expression <RE>
+
+EOF
+  exit (1);  
+  }
+
+print "\n\n==Symbian Foundation Code Churn Tool v1.0==\n\n";
+
+
+
+my $old = "null";
+my $new = "";
+my $filter = "";
+my $codeline = "";
+my $package = "";
+my $licence = "";
+my $packagelist = "";
+my $verbose = 0;
+my $help = 0;
+
+sub do_system
+	{
+	my (@args) = @_;
+	print "* ", join(" ", @args), "\n" if ($verbose);
+	return system(@args);
+	}
+
+# Analyse the command-line parameters
+if (!GetOptions(
+    "n|new-rev|new-label|label|rev=s" => \$new,
+    "o|old-rev|old-label=s" => \$old,
+    "f|filter=s" => \$filter,
+    "p|package=s" => \$filter,
+    "cl|codeline=s" => \$codeline,
+    "li|licence=s" => \$licence,
+	"bom|bom=s" => \$packagelist,
+	"v|verbose" => \$verbose,
+	"h|help" => \$help,
+    ))
+  {
+  Usage("Invalid argument");
+  }
+  
+Usage("") if ($help);
+Usage("Too few arguments....use at least one from -n|new-rev|new-label|label|rev or -bom") if ($new eq "" && $packagelist eq "");
+#Usage("Too many arguments") if ($new ne "" && $packagelist ne "");
+
+
+if ($old eq 'null')
+  {
+    print "\nCode size calculation....\n";		  
+  }
+else
+  {
+    print "\nCode churn calculation....\n";		  
+  }
+
+  
+my @packagelistopts = ();
+@packagelistopts = ("-packagelist", $packagelist) if ($packagelist ne "");
+
+my @verboseopt = ();
+@verboseopt = "-v" if ($verbose);
+
+my $new_rev = $new;
+$new_rev = "%REV%" if ($new_rev eq "");
+
+#TO_DO: Locate clone_all_packages relative to the location of this script.
+#TO_DO: Remove references to absolute paths, change to relative paths.
+do_system("clone_all_packages.pl",@verboseopt,"-mirror","-filter","$licence.*$codeline.*$filter",@packagelistopts,"-exec","--",
+   "hg","--config","\"extensions.hgext.extdiff=\"","extdiff","-p",$churn_core,"-o",$churn_output_temp,
+   "-r","$old","-r","$new_rev");
+
+exit(0);