Wednesday, January 27, 2010

SVN LoC and churn metrics

Wrote a small Perl script to grab svn lines of code metrics (added, modified, deleted) and churn (added + modified) metrics, as well as number of files added, updated, or deletes by revision. So you get a flat file output (fixed width) of timestamp, username, revision, lines added, modified, churned, deleted, files added, updated, deleted.

Using that raw data it's a quick thing to parse it any way you like, such as applying math to predict your defect rate or simply graphing it over time.

Here's the script, creatively named svnloc:


#!/usr/bin/perl
use strict;
use warnings;

my $BARSIZE = 40; # Size of the progress bar
my @statuses = qw(A U D);

my $repo = shift();
my $outfile = shift() || "./svnloc.txt";
my $revision = shift();
my $latest_rev;
my %rev_users;
my %rev_dates;
my %rev_changes;
my %rev_diff;

if (not defined $repo or not -e $repo) {
print <<END_USAGE;
Usage: svnloc repo [outfile [revision]]
repo the path to the svn repository
outfile the path for the output file, defaults to "./svnloc.txt"
revision if specified, will append data for that revision to the output
if not specified, all data for all revisions is obtained and the file
is generated from scratch, overwriting the old file if it exists.
END_USAGE
exit(1);
}

my $bl_filename = "svnloc.blacklist";
my @blacklist; # Don't count these revisions
if (-e "svnloc.blacklist") {
open BL, $bl_filename;
do { chomp; push @blacklist, $_; } for (<BL>);
close BL;
}

if (defined $revision) { #get info for our revision and append to output file
get_info($revision);
open OUTPUT, ">>$outfile";
output_line($revision);
close OUTPUT;
} else { # generate output file from scratch
my $history = `svnlook history $repo`;
($latest_rev) = $history =~ /(\d+)/s;
print "Latest revision: $latest_rev\n";

rev_loop("Obtaining revision information...",\&get_info);

open OUTPUT, ">$outfile";
printf OUTPUT ("%-20s%-18s%6s%7s%7s%5s%5s\n",
"Date","Username","Rev","Add","Mod","Chrn","Del",@statuses);

rev_loop("Generating outputfile ($outfile)...",\&output_line);

close OUTPUT;
}
print "Finished.\n";

sub get_info {
my $rev = shift;

my $info = `svnlook info -r $rev $repo`;
my ($user, $date) = split(/\n/,$info);
$rev_users{$rev} = $user;
$rev_dates{$rev} = $date;

my $changed = `svnlook changed -r $rev $repo`;
for my $s (split(/\n/,$changed)) {
my ($status) = substr($s,0,1);
$rev_changes{$rev}->{$status}++;
}

my $diff = `svnlook diff -r $rev $repo`;

my ($added,$modified,$deleted,$temp_deleted) = (0)x4;
for my $line (split(/\n/,$diff)) {
my $c2 = substr($line,0,2);
my ($c) = substr($line,0,1);
next if ($c2 eq '--' || $c2 eq '++'); # ignore header lines
if ($c eq '-') {
$temp_deleted++;
} elsif ($c eq '+') {
if ($temp_deleted) {
$temp_deleted--;
$modified++;
} else {
$added++;
}
} else {
$deleted += $temp_deleted;
$temp_deleted = 0;
}
}
$rev_diff{$rev}->{added} = $added;
$rev_diff{$rev}->{modified} = $modified;
$rev_diff{$rev}->{churn} = $added + $modified;
$rev_diff{$rev}->{removed} = $deleted;

}

sub output_line {
my $rev = shift;
no warnings 'uninitialized';
printf OUTPUT ("%20s%-18s%6d%7d%7d%7d%7d%5d%5d%5d\n",
substr($rev_dates{$rev},0,20),
$rev_users{$rev},
$rev,
$rev_diff{$rev}->{added},
$rev_diff{$rev}->{modified},
$rev_diff{$rev}->{churn},
$rev_diff{$rev}->{removed},
map { $rev_changes{$rev}->{$_} } @statuses);
}

sub rev_loop {
my ($msg, $code) = @_;
my $progress;
print "$msg\n";
start_progress(\$progress);
for (1..$latest_rev) {
tick_progress(\$progress,$latest_rev);
next if (is_in($_,@blacklist));
$code->($_);
}
end_progress();
}

sub start_progress {
my $progress = shift();
$$progress=0;
print "[" . (" " x $BARSIZE) . "]\r";
}

sub tick_progress {
my $progress = shift();
my $max = shift();
my $ticks = int(($$progress++/$max) * $BARSIZE);
my $spaces = $BARSIZE - $ticks;

printf "[" . ("=" x $ticks)
. (" " x $spaces)
. "] %-10s\r",$_;
}

sub end_progress {
print "[" . ("=" x $BARSIZE) . "]\n\n";
}

sub is_in {
my $item = shift;
my @list = @_;
my %seen;
@seen{@list} = (1) x scalar @list;
return $seen{$item};
}