#!/usr/bin/perl -w
#
# Utility script to sift through shotgun results.  Avoids human impulse to
# cheat or distort the facts.
#
# M. Arthur Munson, 2005
##########################################################################

use strict;
use Getopt::Long;
use FindBin;
use lib $FindBin::Bin;
use ShotgunResults;

my $stem = "perf";
my $suffix = ".1";
my $precision = 5;
my $tuning = "test0";
my $testing = "test1";

sub ShowUsage {
    print <<USAGE;
sift.pl [options] target directory [m1 [m2 m3 ...]]
    Sift through shotgun results and report performance for specified metrics.
  Arguments:
    target - The target metric on which shotgun hill-climbed. E.g. FSC
    directory - Folder containing shotgun results.
    m1..mN - Metrics that sift should report.
  Options:
    -ensemble  Print out models in ensemble instead of ensemble performance.
    -loss      Treat scores as loss values (smaller always better).
    -precision num
             Change the precision following decimal point in reports. Default
             is $precision.
    -prefix pre  Specify the stem prefix for the result files. Defaults to $stem.
    -suffix suff Specify suffix ending for files. Defaults to $suffix.
    -test testing Specify which trial should be used for testing performance.
             Defaults to $testing.
    -tune tuning
             Specify which trial was used for tuning performance. Defaults to
             $tuning.
    -v       Verbose mode (prints table headers).
    -?|-h  Show this message.
  Example:
    sift.pl -tune test1 -test test2 acc results/optROC acc rms roc
USAGE
}

my $help = 0;
my $verbose = 0;
my $lossMode = 0;
my $ensembleMode = 0;

if (!GetOptions("help|?" => \$help,
		"ensemble" => \$ensembleMode,
		"loss" => \$lossMode,
		"precision=n" => \$precision,
	        "prefix=s" => \$stem,
		"suffix=s" => \$suffix,
		"test=s" => \$testing,
		"tune=s" => \$tuning,
		"v" => \$verbose)
    || $help) {
    ShowUsage();
    exit 1;
}

my $hillclimb = "$stem.$tuning$suffix";
my $results = "$stem.$testing$suffix";

if ($#ARGV < 2) {
    print STDERR "Missing required arguments.\n";
    ShowUsage();
    exit(1);
}

my ($target, $directory, @metrics) = @ARGV;

$hillclimb = "$directory/$hillclimb";
$results = "$directory/$results";

# Convert all metrics to uppercase, in case they are not already.

$target = "\U$target";
foreach my $metric (@metrics) {
  $metric = "\U$metric";
}

# Read in the result files.
my $shResults = ShotgunResults->new();
$shResults->load($hillclimb, $results, "hc", "test", $lossMode);

# Find the best ensemble iteration.
my $ensemble = $shResults->bestEnsemble($target);

if ($ensembleMode) {
    enumerateEnsemble($shResults, $ensemble);
}
else {
    reportPerformances($shResults, $ensemble);
}

#################################################
# Helper functions.
#################################################

sub enumerateEnsemble {
    my ($shResults, $ensemble) = @_;

    my @members = $shResults->getEnsembleMembers($ensemble);
    for my $member (@members) {
	print $member, "\n";
    }
}

sub reportPerformances {
    my ($shResults, $ensemble) = @_;
    my $bestIter = $ensemble->[0];

    if ($verbose) {
	print <<HEADER;
$results:
(hillclimbed on $hillclimb to optimize $target)
(iteration $bestIter selected by hillclimbing)
(* indicates smaller is better for metric)
METRIC\t\tOPT      GAIN      SINGLE   (MODEL)
----------------------------------------------
HEADER
	}


    # Find the best single model.
    my $single = $shResults->bestSingleModel($target);

    my $bestSingleModel = $single->[1];
    my $maxIter = $shResults->maxEnsembleSize();
    my $precFormat = "8.$precision" . "f";

    # For each of the requested metrics...

    foreach my $metric (@metrics) {
	#... get the performance when using best single model (for target).
	my $singleScore = $shResults->getScore($single, $metric, "test");

	# get performance when using best ensemble (for target)
	my $ensScore = $shResults->getScore($ensemble, $metric, "test");

	printf("%s%-14s %-$precFormat %-+$precFormat %-$precFormat %s ",
	       ($shResults->smallerIsBetter($metric)) ? "*" : " ",
	       $metric,
	       $ensScore,
	       $ensScore - $singleScore,
	       $singleScore,
	       $bestSingleModel);

	if (!$verbose) {
	    print "($bestIter/$maxIter)\n";
	}
	else {
	    print "\n";
	}
    }
}
