/***
*
* This file contains the helper functions for the Gibbs-sampling algorithm. 
*
***/

#ifndef _GIBBS_UTIL_H
#define _GIBBS_UTIL_H

#include "stdinc.h"
#include "profile.h"
#include "dataset.h"
#include "random.h"

#define EMPTY_SITE -1

enum ScoreMetric { ILR, ENTROPY };

//------------------------------------------------------------
// Structs
//------------------------------------------------------------
typedef struct RunNode_el {
	struct RunNode_el *next;
	double score; 
	int runId; //run ID
	int *sites; //set of sites (one for each position)

	//These profiles may not be updated to be consistent
	//with the current set of sites. 
	//Hence, use with caution!!
	Profile *countmat; //kept to figure out span and columns
	Profile *pswm; //position specific weight matrix
} RunNode;

//List of RunNode
typedef struct {
	RunNode *head;
	int len;
} RunSet;


//---------------------------------------
// RunNode/RunSet functions
//---------------------------------------
extern RunNode* createRunNode(int runId, Dataset *data, int initspan, int maxspan);
extern void nilRunNode(RunNode *rnode);

extern RunSet* createRunSet();
extern void nilRunSet(RunSet *rset);

//---------------------------------------
// Scoring functions
//---------------------------------------
//compute ILR of "pswm"
extern double computeIlrFromPswm(Profile *pswm, Dataset *data);

//compute ILR of count-matrix
extern double computeIlrFromCount(Profile *countmat, Dataset *data);

//compute "real" entropy from the motif-finding literature
extern double computeEntropyFromCount(Profile *countmat, Dataset *data);

//compute KL-divergence
extern double computeEntropyFromPswm(Profile *pswm, Dataset *data);

//compute entropy of one single column
extern double computeEntropyOneColumn(double count[NUMALPHAS], Dataset *data);

//Convert metric to string
extern char* scoreMetricToStr(enum ScoreMetric metric);

//---------------------------------------
// Matrix/profile updates functions
//---------------------------------------
extern void updateCountmatFromSites(Profile *countmat, int *sites, Dataset *data);
extern void updatePswmFromSites(Profile *pswm, int *sites, 
								Dataset *data, boolean usePseudocount);
extern boolean validCountmatWithSites(Profile *countmat, int *sites, Dataset *data);


//----------------------------------------------------------------------
// add/remove/set sites functions
//----------------------------------------------------------------------
//set "sites" to have a new set of random starting positions
extern void setRandomSites(int *sites, int initspan, Dataset *data);

extern void addSite(int newsite, int seqind, int *sites, Profile *countmat, Dataset *data);
extern void removeSite(int seqind, int *sites, Profile *countmat, Dataset *data);
extern void findBestSitesFromPswm(Profile *pswm, int *sites, Dataset *data);

//----------------------------------------------------------------------
// phase-shift
//----------------------------------------------------------------------
extern void attemptPhaseShift(Profile *countmat, int *sites, Dataset *data);


#endif

