/**
*
* This file contains the main() function for GibbsILR. It contains functions to 
* parse arguments from the executable. It is also the engine to run the 
* core functions of the motif finder.
*
**/

#include "gibbsilr.h"
#include "em_alg.h"
#include "print_results.h"

//----------------------------------------------------------------------
// Print-screen parameters and errors
//----------------------------------------------------------------------

static
void printerr() {
	printf("usage: gibbsilr <filename> -l <span> [options]\n\n");
	printf("Options:\n\n");
	printf("-s <int>      set random seed (default: different up to seconds)\n");
	printf("-L <int>      set rapid convergence limit (default: 10)\n");
	printf("-t <int>      number of runs (default: 30)\n");
	printf("\n");
	printf("-T <flt>      temperature value (default: 1.0)\n");
	printf("-p <flt>      pseudo-weight (default: 0.1)\n");
	printf("-ps <flt>     phase shift frequency (default: 0.4)\n");
	printf("\n");
	printf("-em <int>     number of iterations to run EM (default: 50)\n");
	exit(1);
}
static
void printparams(Gibbs *gibbs, int argc, char **argv) {
	int i;
	for(i = 2; i < argc; i++) {
		fprintf(gibbs->fptr, "%s ", argv[i]);
	}
	fprintf(gibbs->fptr, "\n");
	fprintf(gibbs->fptr, "Filename: %s\n", gibbs->fastafile);
	fprintf(gibbs->fptr, "Number of sequences: %d\n", gibbs->numseqs);
	fprintf(gibbs->fptr, "Total residues: %d\n", gibbs->data->total);
	fprintf(gibbs->fptr, "Maximum sequence length: %d\n", gibbs->data->maxSeqLen);
	fprintf(gibbs->fptr, "\n");
	fprintf(gibbs->fptr, "Span: %d\n", gibbs->span);
	fprintf(gibbs->fptr, "Random seed: %ld\n", gibbs->randseed);
	fprintf(gibbs->fptr, "Iterations plateau: %d\n", gibbs->iterPlateauLen);
	fprintf(gibbs->fptr, "Number of runs: %d\n", gibbs->numruns);
	fprintf(gibbs->fptr, "Number of trail iterations: %d\n", gibbs->trialIters); 
	fprintf(gibbs->fptr, "\n");
	fprintf(gibbs->fptr, "Temperature: %s\n", (gibbs->useTemperature ? "on" : "off"));
	fprintf(gibbs->fptr, "Temperature value: %.2lf\n", 1.0 / gibbs->recipTemp);
	fprintf(gibbs->fptr, "Pseudo-weight: %.2lf\n", gibbs->pseudoweight);
	fprintf(gibbs->fptr, "Phase-shift frequency: %.2lf\n", gibbs->phaseShiftFreq);
	fprintf(gibbs->fptr, "\n");
	fprintf(gibbs->fptr, "EM iterations: %d\n", gibbs->emStep);
	fprintf(gibbs->fptr, "\n");
	for(i = 0; i < gibbs->numalphas; i++) {
		fprintf(gibbs->fptr, "Background freq of %c: %.4lf\n", numToAlpha(i), 
			gibbs->data->bgfreq[i]);
	}
	fprintf(gibbs->fptr, "\n");
}


//----------------------------------------------------------------------
// Params and Initialization
//----------------------------------------------------------------------

static
void paramBoundCheck(Gibbs *gibbs) {
	if(gibbs->span < 5 || gibbs->span > MAX_MOTIF_WIDTH) {
		printf("Span out of bound\n");
		printerr();
	}
	else if(gibbs->iterPlateauLen <= 0 || gibbs->numruns <=0) {
		printerr();
	}
	else if(gibbs->emStep < 0) {
		printf("Number of EM step must be >= 0\n");
		printerr();
	}
	else if(gibbs->pseudoweight < 0.0 || gibbs->pseudoweight > 1.0) {
		printf("Pseudo weight out of bound\n");
		printerr();
	}
	else if(gibbs->recipTemp < 0.0 || gibbs->recipTemp < 1.0) {
		printf("Temperature value must be in (0.0, 1.0)\n");
		printerr();
	}
	else if(gibbs->phaseShiftFreq < 0.0 || gibbs->phaseShiftFreq > 1.0) {
		printf("Column shift frequency out of bound\n");
		printerr();
	}
}

static
Gibbs *parseArgsGibbs(int argc, char **argv) {
	Gibbs *gibbs = (Gibbs*) malloc(sizeof(Gibbs));
	if (argc < 3) {
		printerr();
	}
	gibbs->fastafile = argv[1];

	//defaults
	gibbs->numalphas = NUMALPHAS;
	gibbs->span = -1;
	gibbs->randseed = (long)time(NULL); //random up to seconds
	gibbs->numruns = 30;
	gibbs->iterPlateauLen = 10;

	gibbs->emStep = 50;
	gibbs->recipTemp = 1.0;
	gibbs->pseudoweight = 0.1;

	gibbs->phaseShiftFreq = 0.4;
	gibbs->trialIters = 5; //no parameters to set this
	gibbs->fptr = stdout;

	int i = 2;
	int error;
	while(i < argc) {
		if (!strcmp(argv[i],"-s")) {
			i++;
			error = sscanf(argv[i], "%ld", &(gibbs->randseed)); 
			if(error<1) printerr();
		}
		else if (!strcmp(argv[i],"-l")) {
			i++;
			error = sscanf(argv[i], "%d", &(gibbs->span));
			if(error<1) printerr();
		}

		else if (!strcmp(argv[i],"-t")) {
			i++;
			error = sscanf(argv[i], "%d", &(gibbs->numruns));
			if(error < 1) printerr();
		}
		else if (!strcmp(argv[i],"-L")) {
			i++;
			error = sscanf(argv[i], "%d", &(gibbs->iterPlateauLen));
			if(error<1) printerr();
		}

		else if (!strcmp(argv[i],"-ps")) {
			i++;
			error = sscanf(argv[i], "%lf", &(gibbs->phaseShiftFreq));
			if(error<1) printerr();
		}
		else if (!strcmp(argv[i],"-T")) {
			double temperature = 0.0;
			i++;
			error = sscanf(argv[i], "%lf", &(temperature));
			gibbs->recipTemp = 1.0 / temperature;
			if(error<1 || temperature < 0.01) printerr();
		}
		else if (!strcmp(argv[i],"-p")) {
			i++;
			error = sscanf(argv[i], "%lf", &(gibbs->pseudoweight));
			if(error<1) printerr();
		}
		else if (!strcmp(argv[i],"-em")) {
			i++;
			error = sscanf(argv[i], "%d", &(gibbs->emStep));
			if(error<1) printerr();
		}
		else if (!strcmp(argv[i],"-n")) {
			//nothing, doesn't apply for this gibbs finder
		}
		else {
			printerr();
		}
		i++;
	}
	paramBoundCheck(gibbs);

	gibbs->randseed = labs(gibbs->randseed);
	if(gibbs->randseed == 0) {
		gibbs->randseed = 1; //cannot start with seed 0
	}
	sRandom(gibbs->randseed);

	return gibbs;
}


static 
void initGibbsStructs(Gibbs *gibbs) {
	gibbs->numseqs = gibbs->data->numseqs;
	gibbs->runset = createRunSet();

	gibbs->posScore = (double*) malloc(gibbs->data->maxSeqLen * sizeof(double));

	gibbs->ems = makeEmStruct(gibbs->span, gibbs->data);
	gibbs->useTemperature = (fabs(gibbs->recipTemp - 1.0) > 0.001);

	//tracker - for output only
	gibbs->totalIters = 0;
}
//----------------------------------------------------------------------
// Main
//----------------------------------------------------------------------
int main(int argc, char *argv[]) {
	if(DEBUG0 || DEBUG2) {
		printf("WARNING: This is currently running under DEBUG mode.\n");
	}
	if(DEBUG1) {
		printf("Verbose mode.\n");
	}
	printf("Compiled on " __DATE__ " " __TIME__ "\n");
	printf("\n");

	RunNode *bestnode;

	Gibbs *gibbs = parseArgsGibbs(argc, argv);
	gibbs->data = openDataset(gibbs->fastafile, gibbs->numalphas);
	initPseudocount(gibbs->data, gibbs->pseudoweight);
	initGibbsStructs(gibbs);
	printparams(gibbs, argc, argv);

	bestnode = runGibbs(gibbs);

	printf("Average number of iterations per run: %.2lf\n", 
		((double)gibbs->totalIters) / gibbs->numruns);

	double ilrscore = computeIlrFromCount(bestnode->countmat, gibbs->data);
	printf("Incomplete Likelihood Ratio: %.2lf\n", ilrscore);


	printCountmatAndSites(bestnode->countmat, bestnode->sites, gibbs->data);

	nilGibbs(gibbs);
	return(0);
}

