code_churn/churn_core.pl
author Pat Downey <patd@symbian.org>
Thu, 21 Jan 2010 16:55:34 +0000
changeset 143 6e81c130aa29
parent 48 c0d2a34bf681
child 157 27cf0a88d449
permissions -rw-r--r--
Add initial version of blacklist mercurial extension for use to exclude and manage known bad changesets from mercurial repositories.

#!perl -w

# Copyright (c) 2009 Symbian Foundation Ltd
# This component and the accompanying materials are made available
# under the terms of the License "Eclipse Public License v1.0"
# which accompanies this distribution, and is available
# at the URL "http://www.eclipse.org/legal/epl-v10.html".
#
# Initial Contributors:
# Symbian Foundation Ltd - initial contribution.
# 
# Contributors:
#
# Description:
#

use strict;
use File::Find;
use File::Copy;
use Cwd;

sub diffstat();

my $Logs_Dir = $ARGV[0];
my $dir_left = $ARGV[1];
my $dir_right = $ARGV[2];
my $dir_tmp_left = $ARGV[0].'\\'.$ARGV[1];
my $dir_tmp_right = $ARGV[0].'\\'.$ARGV[2];

print "left changeset $dir_left\n";
print "right chnageset $dir_right\n";
mkdir $dir_tmp_left;
mkdir $dir_tmp_right;

# default inclusions from churn.pl are "*.cpp", "*.c", "*.cxx", "*.h", "*.hpp", "*.inl" 
my @file_pattern=('\.cpp$','\.c$','\.hpp$','\.h$','\.inl$','\.cxx$','\.hrh$');
my $totallinecount=0;
my $countcomments=0;

if (! -d $Logs_Dir)
{
    die("$Logs_Dir does not exist \n");
}

$dir_left =~ m/^(\w+)\.[0-9a-fA-F]+/;
my $package_name = $1;

$dir_left =~ m/^\w+\.([0-9a-fA-F]+)/;
my $changeset_left = $1;

$dir_right =~ m/^\w+\.([0-9a-fA-F]+)/;
my $changeset_right = $1;

print "\nWorking on package: $package_name\n";
print "\nProcessing $dir_left\n";
find(\&process_files, $dir_left);
#DEBUG INFO:
print "\nTotal linecount for changed files in $dir_left is $totallinecount\n";
my $code_size_left = $totallinecount;

$totallinecount=0;
print "\nProcessing $dir_right\n";
find(\&process_files, $dir_right);
#DEBUG INFO:
print "\nTotal linecount for changed files in $dir_right is $totallinecount\n";    
my $code_size_right = $totallinecount;

my @diffs;

if (-d $dir_tmp_left && -d $dir_tmp_left)
{
	@diffs = `diff -r -N $dir_tmp_left $dir_tmp_right`;
}

my $changed_lines=@diffs;
my $diffsfile = $Logs_Dir.'\\'."dirdiffs.out";
open (DIFFS, ">$diffsfile");
print DIFFS @diffs;
close (DIFFS);

diffstat();

$dir_tmp_left =~ s{/}{\\}g;
$dir_tmp_right =~ s{/}{\\}g;

if (-d $dir_tmp_left)
{
	system("rmdir /S /Q $dir_tmp_left");
}

if (-d $dir_tmp_right)
{
system("rmdir /S /Q $dir_tmp_right");
}

unlink $diffsfile;
unlink "$Logs_Dir\\line_count_newdir.txt";

print "\n** Finished processing $package_name **\n\n\n\n\n";

exit(0);

sub diffstat()
{
open (DIFFSFILE,"$diffsfile");

my $curfile = "";
my %changes = ();

while (<DIFFSFILE>)
{
	my $line = $_;
				# diff -r -N D:/mirror\fbf_churn_output\commsfw.000000000000\serialserver\c32serialserver\Test\te_C32Performance\USB PC Side Code\resource.h 
				# diff -r <anything><changeset(12 chars)><slash><full_filename><optional_whitespace><EOL>
	if ($line =~ m/^diff -r.*\.[A-Fa-f0-9]{12}[\/\\](.*)\s*$/)
	{
		$curfile = $1;
		#DEBUG INFO:
		#print "\t$curfile\n";
		if (!defined $changes{$curfile})
		{
			$changes{$curfile} = {'a'=>0,'c'=>0,'d'=>0,'filetype'=>'unknown'};
		}
		
		$curfile =~ m/\.(\w+)$/g;
				
		#if filetype known...
		my $filetype = $+;
		
		$changes{$curfile}->{'filetype'}=uc($filetype);
	}
	elsif ($line =~ m/^(\d+)(,(\d+))?(d)\d+(,\d+)?/)
	{	
		if (defined $3)
		{
			$changes{$curfile}->{$4} += ($3-$1)+1;
		}
		else
		{
			$changes{$curfile}->{$4}++;
		}
	}
	elsif ($line =~ m/^\d+(,\d+)?([ac])(\d+)(,(\d+))?/)
	{	
		if (defined $5)
		{
			$changes{$curfile}->{$2} += ($5-$3)+1;
		}
		else
		{
			$changes{$curfile}->{$2}++;
		}	
	}
}

close (DIFFSFILE);

my %package_changes = ("CPP"=>0, "H"=>0, "HPP"=>0, "INL"=>0, "C"=>0, "CXX"=>0,"HRH"=>0,);
my %package_deletions = ("CPP"=>0, "H"=>0, "HPP"=>0, "INL"=>0, "C"=>0, "CXX"=>0,"HRH"=>0,);
my %package_additions = ("CPP"=>0, "H"=>0, "HPP"=>0, "INL"=>0, "C"=>0, "CXX"=>0,"HRH"=>0,);
my $package_churn = 0;

for my $file (keys %changes)
{
	$package_changes{$changes{$file}->{'filetype'}} += $changes{$file}->{'c'};
	$package_deletions{$changes{$file}->{'filetype'}} += $changes{$file}->{'d'};
	$package_additions{$changes{$file}->{'filetype'}} += $changes{$file}->{'a'};
}


#DEBUG INFO: For printing contents of hashes containing per filetype summary
#print "\n\n\n\n";
#print "package_changes:\n";
#print map { "$_ => $package_changes{$_}\n" } keys %package_changes;
#print "\n\n\n\n";
#print "package_deletions:\n";
#print map { "$_ => $package_deletions{$_}\n" } keys %package_deletions;
#print "\n\n\n\n";
#print "package_additions:\n";
#print map { "$_ => $package_additions{$_}\n" } keys %package_additions;



my $overall_changes = 0;
for my $filetype (keys %package_changes)
{
	$overall_changes += $package_changes{$filetype};
}

my $overall_deletions = 0;
for my $filetype (keys %package_deletions)
{
	$overall_deletions += $package_deletions{$filetype};
}

my $overall_additions = 0;
for my $filetype (keys %package_additions)
{
	$overall_additions += $package_additions{$filetype};
}


$package_churn = $overall_changes + $overall_additions;

print "\n\n\n\nSummary for Package: $package_name\n";
print "-------------------\n";
print "Changesets Compared: $dir_left and $dir_right\n";
#print "Code Size for $dir_left = $code_size_left lines\n";
#print "Code Size for $dir_right = $code_size_right lines\n";
print "Total Lines Changed = $overall_changes\n";
print "Total Lines Added = $overall_additions\n";
print "Total Lines Deleted = $overall_deletions\n";
print "Package Churn = $package_churn lines\n";

my @header = qw(filetype a c d);

my $outputfile = $Logs_Dir.'\\'."$package_name\_diffstat.csv";
open(PKGSTATCSV, ">$outputfile") or die "Coudln't open $outputfile";



print PKGSTATCSV " SF CODE-CHURN SUMMARY\n";
print PKGSTATCSV "Package: $package_name\n";
print PKGSTATCSV "Changesets Compared: $dir_left and $dir_right\n";
#print PKGSTATCSV "Code Size for $dir_left = $code_size_left lines\n";
#print PKGSTATCSV "Code Size for $dir_right = $code_size_right lines\n";
print PKGSTATCSV "Total Lines Changed = $overall_changes\n";
print PKGSTATCSV "Total Lines Added = $overall_additions\n";
print PKGSTATCSV "Total Lines Deleted = $overall_deletions\n";
print PKGSTATCSV "Package Churn = $package_churn lines\n\n\n\n\n";




# print the header
print PKGSTATCSV "FILENAME,";

foreach my $name (@header)
{
  if ($name eq 'filetype')
  {
	print PKGSTATCSV uc($name).",";
  }  
  elsif ($name eq 'a')
 {
	print PKGSTATCSV "LINES_ADDED,";
 }
  elsif ($name eq 'c')
 {
	print PKGSTATCSV "LINES_CHANGED,";
 }
  elsif ($name eq 'd')
 {
	print PKGSTATCSV "LINES_DELETED,";
 }
    
}

print PKGSTATCSV "\n";

foreach my $file (sort keys %changes)
{
  print PKGSTATCSV $file.",";
  foreach my $key (@header)
  {
    if(defined $changes{$file}->{$key})
    {
      print PKGSTATCSV $changes{$file}->{$key};
    }
    print PKGSTATCSV ",";
  }
  print PKGSTATCSV "\n";
}

close (PKGSTATCSV);



my $diffstat_summary = $Logs_Dir.'\\'."diffstat_summary.csv";

if (-e $diffstat_summary)
{ 
	open(DIFFSTATCSV, ">>$diffstat_summary") or die "Coudln't open $outputfile";
	print DIFFSTATCSV "$package_name,";
	print DIFFSTATCSV "$changeset_left,";
	print DIFFSTATCSV "$changeset_right,";
	
	#print DIFFSTATCSV ",";

	foreach my $filetype (sort keys %package_changes)
	{
		if(defined $package_changes{$filetype})
		{
		  print DIFFSTATCSV $package_changes{$filetype}.",";
		}
	}

	#print DIFFSTATCSV ",";
	
	foreach my $filetype (sort keys %package_additions)
	{
		if(defined $package_additions{$filetype})
		{
		  print DIFFSTATCSV $package_additions{$filetype}.",";
		  
		}
	}
	
	#print DIFFSTATCSV ",";
	
	foreach my $filetype (sort keys %package_deletions)
	{
		if(defined $package_deletions{$filetype})
		{
		  print DIFFSTATCSV $package_deletions{$filetype}.",";
		  #print DIFFSTATCSV ",";
		}
	}
	
	#print DIFFSTATCSV ",";
	print DIFFSTATCSV "$overall_changes,";
	print DIFFSTATCSV "$overall_additions,";
	print DIFFSTATCSV "$overall_deletions,";
	print DIFFSTATCSV "$package_churn,";

	print DIFFSTATCSV "\n";
	
	close (DIFFSTATCSV);
}
else
{
	open(DIFFSTATCSV, ">$diffstat_summary") or die "Couldn't open $outputfile";

	# print the header
	print DIFFSTATCSV "PACKAGE_NAME,";
	print DIFFSTATCSV "LEFT_CHANGESET,";
	print DIFFSTATCSV "RIGHT_CHANGESET,";

	#print DIFFSTATCSV ",";

	foreach my $name (sort keys %package_changes)
	{
		print DIFFSTATCSV $name." CHANGES,";    
	}
	#print DIFFSTATCSV ",";


	foreach my $name (sort keys %package_additions)
	{
		print DIFFSTATCSV $name." ADDITIONS,";    
	}
	#print DIFFSTATCSV ",";


	foreach my $name (sort keys %package_deletions)
	{
		print DIFFSTATCSV $name." DELETIONS,";    
	}
	#print DIFFSTATCSV ",";
	
	print DIFFSTATCSV "PACKAGE_CHANGES,";
	print DIFFSTATCSV "PACKAGE_ADDITIONS,";
	print DIFFSTATCSV "PACKAGE_DELETIONS,";
	print DIFFSTATCSV "PACKAGE_CHURN,";
	print DIFFSTATCSV "\n";
	
	
	print DIFFSTATCSV "$package_name,";
	
	print DIFFSTATCSV "$changeset_left,";
	print DIFFSTATCSV "$changeset_right,";
	
	#print DIFFSTATCSV ",";

	foreach my $filetype (sort keys %package_changes)
	{
		if(defined $package_changes{$filetype})
		{
		  print DIFFSTATCSV $package_changes{$filetype}.",";
		}
	}

	#print DIFFSTATCSV ",";
	
	foreach my $filetype (sort keys %package_additions)
	{
		if(defined $package_additions{$filetype})
		{
		  print DIFFSTATCSV $package_additions{$filetype}.",";
		  
		}
	}
	
	#print DIFFSTATCSV ",";
	
	foreach my $filetype (sort keys %package_deletions)
	{
		if(defined $package_deletions{$filetype})
		{
		  print DIFFSTATCSV $package_deletions{$filetype}.",";
		}
	}

	#print DIFFSTATCSV ",";
	print DIFFSTATCSV "$overall_changes,";
	print DIFFSTATCSV "$overall_additions,";
	print DIFFSTATCSV "$overall_deletions,";
	print DIFFSTATCSV "$package_churn,";
	
	print DIFFSTATCSV "\n";
	
	close (DIFFSTATCSV);
}



}

sub process_files() 
{
    my $lfile = $_;
    my $lfile_fullpath=$File::Find::name;
    $lfile_fullpath =~ s#\/#\\#g;
    #print "$lfile\t\tFull path $lfile_fullpath\n" ;
    if (-f $lfile)
    { 
        foreach my $regpat (@file_pattern)
        {
            if (lc($lfile) =~ m/$regpat/)
            {
                $lfile  =~ s#\/#\\#g;
                #print "Processing file $lfile (Matched $regpat) \n"; #ck
                #print `type $lfile`;
                # We copy mathching files to a separate temp directory
                # so that the final diff can simply diff the full dir
                # Note :  RemoveNoneLOC routine edits the file in-situ.
                my $lfile_abs = cwd().'\\'.$lfile;
                my $lfile_local = $Logs_Dir.'\\'.$lfile_fullpath;
                makepath($lfile_local);
                print "%";
                copy($lfile_abs,$lfile_local);
				$totallinecount += RemoveNonLOC( $lfile, $lfile_local, "newdir" );
            }
        }
    }   
}


sub makepath()
{
    my $absfile = shift; 
    $absfile =~ s#\\#\/#g;
    my @dirs = split /\//, $absfile;
    pop @dirs;  # throw away the filename
    my $path = "";
    foreach my $dir (@dirs)
    {
        $path = ($path eq "") ? $dir : "$path/$dir";
        if (!-d $path)
        {
#          print "making $path \n";
          mkdir $path;
        }
    }
}


sub RemoveNonLOC($$$) {

    # Gather arguments
    my $file = shift;
    my $original_file  = shift;
    my $type_of_dir = shift;
    
#    print("\nDebug: in ProcessFile, file is $file, full file + path is $original_file \n");
     
	# Remove comments...
	
    # Set up the temporary files that will be used to perform the processing steps
    my $temp1File = $original_file."temp1";
    my $temp2File = $original_file."temp2";
	
    open(TEMP1, "+>$temp1File");
    
    if (!($countcomments)) {
    
     	# Remove any comments from the file
		my $original_file_string;
     	open INPUT, "<", $original_file;
		{
			local $/ = undef;
			$original_file_string = <INPUT>;
		}
		close INPUT;
 
     	my $dbl = qr/"[^"\\]*(?:\\.[^"\\]*)*"/s;
        my $sgl = qr/'[^'\\]*(?:\\.[^'\\]*)*'/s;

        my $C   = qr{/\*.*?\*/}s; # C style comments /*  */
        my $CPP = qr{//.*}; # C+ style comments //
        my $com = qr{$C|$CPP};
        my $other = qr{.[^/"'\\]*}s; # all other '"
        my $keep = qr{$sgl|$dbl|$other};
     
     	#Remove the comments (need to turn off warnings on the next regexp for unititialised variable)
no warnings 'uninitialized';

        $original_file_string=~ s/$com|($keep)/$1/gom;  
        print TEMP1 "$original_file_string";

use warnings 'uninitialized';
    }
    else {
    
        print("\n option --CountComments specified so comments will be included in the count\n");
        #Just copy over original with comments still in it
		copy($original_file,$temp1File); 
    }
   	 
    close(TEMP1);
   	
 	  
    # Remove blank lines...
#   print("\nDebug: Getting rid of blank lines in \n$temp1File to produce \n$temp2File \n");
    open (TEMP1, "+<$temp1File"); # include lines + pre-processed code
    open (TEMP2, "+>$temp2File"); 
    
    while (<TEMP1>) {
		
        if (!(/^\s*\n$/)) { # if line isn't blank write it to the new file 
        print TEMP2 $_;
	}
    }
    close(TEMP1);
    close(TEMP2);
     
    #Copy the final file to the original file. This updated file will form the input to diff later.
    #todo dont need chmod now?
    chmod(oct("0777"), $original_file) or warn "\nCannot chmod $original_file : $!\n";
#   print("\nCopying $temp2File\n to \n$original_file\n");
    
    #system("copy /Y \"$temp2File\" \"$original_file\"") == 0
    #or print "\nERROR: Copy of $temp2File to $original_file failed\n";
    copy($temp2File,$original_file);
  	 
    # Store original file size
    
    open(LINECOUNT, ">>$Logs_Dir\\line_count_$type_of_dir.txt");
    open(SOURCEFILE, "<$original_file");
    
    my @source_code = <SOURCEFILE>;
    print  LINECOUNT "\n$original_file   ";
    my $linecount = scalar(@source_code);
#	print  LINECOUNT scalar(@source_code);
    print  LINECOUNT $linecount; 
     
    close(LINECOUNT);
    close(SOURCEFILE);
    
    #system("del /F /Q $Logs_Dir\\line_count_$type_of_dir.txt");

    #Delete the temporary files
    unlink($temp1File);
    unlink($temp2File);
       
    return $linecount;   
}