#!/usr/bin/perl
# Count lines of code, excluding C++ comments.  Note: it might not work
# too well with /* .. */ comments.
# Usage: loc [-s] [-c] [-t] [dirs]

$count_comments = 0;
$count_tokens = 0;
$count_semis = 0;
$count_semis_and_braces = 0;

while (@ARGV) {
    if ($ARGV[0] eq '-c') {
	$count_comments = 1;
	shift;
    }
    elsif ($ARGV[0] eq '-b') {
	$count_semis_and_braces = 1;
	shift;
    }
    elsif ($ARGV[0] eq '-s') {
	$count_semis = 1;
	shift;
    }
    elsif ($ARGV[0] eq '-t') {
	$count_tokens = 1;
	shift;
    }
    elsif ($ARGV[0] =~ /^-/) {
        die "usage: loc [-s] [-b] [-c] [-t] files\n";
	shift;
    }
    else {
	last;
    }
}

die "Cannot count tokens in comments\n" if $count_comments && $count_tokens;

$totlines = 0;
$tottokens = 0;
$totsemis = 0;
$file = undef;
$count = 0;

while (<>) {
    if (defined $file && $ARGV ne $file) {
        $totlines += $lines;
        $tottokens += $tokens;
        $totsemis += $semis;
        $totbraces += $braces;

        &output($file, $lines, $tokens, $semis, $braces);

        $count++;
    }

    unless (defined $file) {
        $file = $ARGV;

        $lines = 0;
        $tokens = 0;
        $semis = 0;
        $braces = 0;
        $comment = 0;
    }

    chomp;

    if (! $count_comments) {
        if ($comment) {
            $_ = '/*' . $_;
        }

        s%//.*%%g;
        s%/\*.*?\*/%%g;

        if (m%/\*%g) {
            $comment = 1;
        }
        else {
            $comment = 0;
        }

        s%/\*.*%% if $comment;
    }

    s/^\s*$//;
    next unless $_ ne '';

    $lines++;
    $tokens += scalar &tokenize($_) if $count_tokens;

    $x = $_;
    $x =~ s/[^{]//g;
    $braces += length($x);

    $x = $_;
    $x =~ s/[^;]//g;
    $semis += length($x);
}

$totlines += $lines;
$tottokens += $tokens;
$totsemis += $semis;
$totbraces += $braces;

&output($file, $lines, $tokens, $semis, $braces);

if ($count > 1) {
    &output('Total', $totlines, $tottokens, $totsemis, $totbraces);
}

sub output
{
    my ($file, $lines, $tokens, $semis, $braces) = @_;

    print "$file: lines ", $lines;

    if ($count_tokens) {
	print ", tokens ", $tokens;
    }

    if ($count_semis) {
	print ", semis ", $semis;
    }

    if ($count_semis_and_braces) {
	print ", semis+braces ", $braces+$semis;
    }

    print "\n";
}

sub tokenize
{
    my $line = $_;
    my @tokens;
    my @t;
    my $t;

    # Cut $line into tokens.  Do tokens with longer prefix first.
    # This might not be an exhastive list.

    $line =~ s/'.*?'/ CHAR /g;
    $line =~ s/".*?"/ STRING /g;
    $line =~ s/(\(|\)|\[|\]|\{|\}|\.|,|;)/ $1 /g;

    @t = split /\s+/, $line;

    foreach $t (@t) {
        push @tokens, grep { /\S/ } split /\b/, $t;
    }

    # print join(" ", map { "<$_>" } @tokens), "\n";

    @tokens;
}
