#!/usr/bin/perl -w

######################
# Documentation
######################

=head1 NAME

genTuningScale.pl - Generates performance scales with range [tuned,ideal].

=head1 SYNOPSIS

genTuningScale.pl [options] perfFolder [metric1 metric2 ...]

  Options:
    -help|?       brief help message
    -hillclimb d  hillclimb on dataset d instead of test1
    -library lib  if needed, measure indiv model perf on lib.
    -man          full documentation
    -shotgun jar  measure perf w/ given jar file (if needed)
    -test t       test performance on dataset t instead of test2

=head1 DESCRIPTION

Generates a performance scale for each of the specified metrics.  The bottom of the scale is the best single model performance (with respect to each metric), aka parameter tuning.  If no metrics are given the script generates scales for the metrics listed in Metrics.pm (which are fairly standard metrics).

The script expects to find shotgun output files in perfFolder with the names perf.singleton.test1 and perf.singleton.test2; performance values in these files should be reported as loss (ie, 0 good, 1 bad).  However, if perfFolder does not exist the script can generate perf.singleton.* by running shotgun using the -library and -shotgun options.  Generated files will be moved to perfFolder.

Output has the format:
   # metric bottom top
   acc 0.24817 0.14536
   fsc 1.00000 0.30938
   ...

In particular, the output is reported in terms of error/loss so that lower numbers are always better.

=head1 OPTIONS

=over 3

=item B<-help|?>

Prints a brief help message and exits.

=item B<-hillclimb> d

Dataset d should be used for hillclimbing and best-model selection.  If not given, set test1 is used by default.

=item B<-library> lib

If the performance of individual models cannot be found, run shotgun using lib as the model library to calculate the individual model performances.  See also -shotgun option.

=item B<-man>

Prints the manual page and exits.

=item B<-shotgun> jar

Use given shotgun jar file to calculate individual model performances.  This is only needed if the -library option is used.

=item B<-test> t

Measure the best model's performance on test set t instead of test2.

=back

=cut

#################################
# Implementation
#################################

use strict;
use Getopt::Long;
use Pod::Usage;
use FindBin;
use lib $FindBin::Bin;
use ShotgunResults;
use Metrics;

my $help = 0;
my $man = 0;
my $library = "";
my $hcSet = "test1";
my $testSet = "test2";
my $shotgun = "";

# Parse options and print usage if there is a syntax error.
# (or if usage explicitly requested)
GetOptions("help|?" => \$help,
	   "hillclimb=s" => \$hcSet,
	   "library=s" => \$library,
           "man" => \$man,
	   "shotgun=s" => \$shotgun,
	   "testSet=s" => \$testSet)
    or pod2usage(-verbose => 0);

pod2usage(-verbose => 1) if $help;
pod2usage(-verbose => 2) if $man;

# Collect arguments and perform some sanity checks.

my $perfFolder = shift(@ARGV)
    or pod2usage("$0: No perfFolder argument given.");

my @metrics = @ARGV;
if (scalar(@metrics) == 0) {
    # Use standard metrics.
    @metrics = Metrics::enumMetrics();
}

my $hcFile = "$perfFolder/perf.singleton.$hcSet";
if (! (-e $hcFile)) {
    # Verify we have a model library to use to measure model perfs.
    pod2usage("$0: $hcFile does not exist; try -lib and -shotgun options")
	if ($library eq "" || $shotgun eq "");
    pod2usage("$0: could not find $library") if (! -e $library);

    # Need to try and generate results for single models.
    `mkdir -p $perfFolder`;

    my $addmetrics = Metrics::getAllAddCommands();
    my $cmd = "time java -jar $shotgun "
	. "-x -l -o singleton $addmetrics "
	. "$library $hcSet";
    print STDERR "$cmd\n";
    `$cmd`;
    `mv perf.singleton.* $perfFolder/`;
}

pod2usage("$0: $hcFile does not exist.") if (! (-e $hcFile));
my $testFile = "$perfFolder/perf.singleton.$testSet";
pod2usage("$0: $testFile does not exist.") if (! (-e $testFile));

# Read in result files.
my $results = ShotgunResults->new();
$results->load($hcFile, $testFile, "hc", "test", 1);

# Find tuned performance for each metric.

my %scales;

for my $metric (@metrics) {
    my $single = $results->bestSingleModel($metric);
    my $score = $results->getScore($single, $metric, "test");

    # Set range for metric; score is baseline, ideal is always 0 for loss.
    $scales{$metric} = [$score, 0];
}

# Print out scales.

print "# metric bottom top\n";

for my $metric (sort keys %scales) {
    my $min = $scales{$metric}->[0];
    my $max = $scales{$metric}->[1];
    print "$metric $min $max\n";
}

