/* importance sampling kernel svm */
#include <stdio.h>
#include <assert.h>
#include "svm_common.h"
#include "./SFMT-src-1.3.3/SFMT.h"


#define ALPHA_THRESHOLD 1E-10
#define IDLE_ITER 20
#define CLEANUP_CHECK 50
#define MAX_SAMPLE_ITER 1000

/* mosek interface */
int mosek_qp_optimize(double**, double*, double*, long, double);

void my_read_input_parameters(int argc, char* argv[], char *trainfile, char *testfile, char *modelfile,
			      LEARN_PARM *learn_parm, KERNEL_PARM *kernel_parm);

void my_wait_any_key();

int resize_cleanup(int size_active, int m, long SAMPLE_SIZE, int *idle, 
		   double *alpha, double *delta, float **A_sample, double **G_sample,
		   long **all_sample_idx, long *all_size_c);
void init_model(MODEL* model, int num_sv, long totdoc, long totwords, KERNEL_PARM *kernel_parm);

/* assume only one svector in doc */
double my_kernel(KERNEL_PARM *kernel_parm, DOC *a, DOC *b) {
  switch (kernel_parm->kernel_type) {
  /* linear kernel */
  case 0: return((double) sprod_ss(a->fvec,b->fvec));
  /* polynomial */
  case 1: return((double)pow(kernel_parm->coef_lin*sprod_ss(a->fvec,b->fvec)+kernel_parm->coef_const,(double)kernel_parm->poly_degree)); 
  /* RBF with gamma=0.05 */
  case 2: return((double) exp(-kernel_parm->rbf_gamma*(a->fvec->twonorm_sq-2*sprod_ss(a->fvec,b->fvec)+b->fvec->twonorm_sq)));
  default: printf("Unrecognized kernel_type %ld!\n", kernel_parm->kernel_type); exit(0);
  }
}

int main(int argc, char* argv[]) {
  DOC **X;
  double *Y;
  long totwords, totdoc, test_totwords, test_totdoc;
  float **A_sample, *Acol_sample; 
  double **G_sample, *G_sample_col;
  double *Xw;
  double *alpha;
  long *sample_idx;
  int *c;
  long *ind, end_ind;
  double value, xi;
  double *delta;
  int iter;
  long i,j,m, size_c;
  double temp;
  double C, epsilon;
  int r;
  long temp_idx;
  long train_error;
  LEARN_PARM learn_parm;
  KERNEL_PARM  kernel_parm;
  char trainfile[1024];
  char testfile[1024];
  char modelfile[1024];
  long MAX_ITER;
  long SAMPLE_SIZE;
  /* store sample indices for kernel expansion during testing */
  long **all_sample_idx;
  long *all_size_c;
  double *beta;
  DOC **test_X;
  double *test_Y;
  double *test_Xw;
  long test_error;
  long num_sv;
  long runtime_start, runtime_end;
  int *idle; /* for cleanup */
  int size_active;
  long *sv_idx;
  /* margin loss */
  double *margin_loss;
  double violation; 
  long sample_iter;
  MODEL *model=(MODEL *)my_malloc(sizeof(MODEL));

  /* read input parameters */
  my_read_input_parameters(argc, argv, trainfile, testfile, modelfile, &learn_parm, &kernel_parm); 

  /* need to change these as input parameters later */
  epsilon = learn_parm.eps;
  C = learn_parm.svm_c;
  MAX_ITER = learn_parm.maxiter;
  SAMPLE_SIZE = learn_parm.svm_maxqpsize;
  /* DEBUG */
  printf("epsilon: %.8f\n", epsilon);
  printf("C: %.4f\n", C);
  printf("MAX_ITER: %ld\n", MAX_ITER);
  printf("SAMPLE_SIZE: %ld\n", SAMPLE_SIZE);
  printf("Training set: %s\n", trainfile);
  printf("Test set: %s\n", testfile);
  printf("Kernel type: %ld\n", kernel_parm.kernel_type);
  fflush(stdout);
  /* DEBUG */



  /* input in svm^light format */
  read_documents(trainfile, &X, &Y, &totwords, &totdoc);

  /* start timer, exclude I/O */
  runtime_start = get_runtime();

  /* initialization */
  m = totdoc;
  xi = 0.0;
  A_sample = NULL;
  G_sample = NULL;
  G_sample_col = NULL;
  delta = NULL;
  alpha = NULL;
  all_sample_idx = NULL;
  all_size_c = NULL;
  sample_idx = (long*) my_malloc(sizeof(long)*SAMPLE_SIZE);
  c = (int*) my_malloc(sizeof(int)*m);
  Xw = (double*) my_malloc(sizeof(double)*m);
  Acol_sample = (float*) my_malloc(sizeof(float)*m);
  ind = (long*) my_malloc(sizeof(long)*m);  
  beta = (double*) my_malloc(sizeof(double)*m);
  sv_idx = (long*) my_malloc(sizeof(long)*m);
  test_X = NULL;
  test_Y = NULL;
  idle = NULL;
  margin_loss = (double*) my_malloc(sizeof(double)*m);

  /* initialize the RNG */
  init_gen_rand(learn_parm.biased_hyperplane);

  /* first constraint */
  for (i=0;i<m;i++) {
    Xw[i] = 0.0;
  }
  /* compute constraint c */
  value = 0.0;
  size_c = 0;
  end_ind = 0;
  for (i=0;i<m;i++) {
    if (1-Y[i]*Xw[i]>0) {
      c[i] = 1;
      ind[end_ind] = i;
      end_ind++;
      value = value + 1 -Y[i]*Xw[i];
      margin_loss[i] = 1 - Y[i]*Xw[i];
    } else {
      c[i] = 0;
    }
  }
  size_c = end_ind;
  value /= m;

  
  iter = 0;
  size_active = 0;
  while ((value>xi+epsilon)&&(iter<MAX_ITER)) {
    /* update iter */	
    iter+=1;
    size_active+=1;
    
    delta = (double*) realloc(delta, sizeof(double)*size_active);
    assert(delta!=NULL);
    delta[size_active-1] = ((double) size_c)/m;
    G_sample_col = (double*) realloc(G_sample_col, sizeof(double)*size_active);
    assert(G_sample_col!=NULL);
    alpha = (double*) realloc(alpha, sizeof(double)*size_active);
    assert(alpha!=NULL);
    alpha[size_active-1] = 0.0;
    /* update cleanup information */
    idle = (int*) realloc(idle, sizeof(int)*size_active);
    assert(idle!=NULL);
    idle[size_active-1] = 0;
  
    /* compute sample_idx */
    violation = 0.0;
    for (i=0;i<SAMPLE_SIZE;i++) {
      temp_idx = (long) floor(genrand_res53()*end_ind);
      sample_idx[i] = ind[temp_idx];
      violation += margin_loss[ind[temp_idx]];
    }
    sample_iter = 1;
    /* resample if sampled constraint is not violated */
    while ((violation/SAMPLE_SIZE*size_c/m<=xi+epsilon)&&(sample_iter<MAX_SAMPLE_ITER)) {
      violation = 0.0;
      for (i=0;i<SAMPLE_SIZE;i++) {
        temp_idx = (long) floor(genrand_res53()*end_ind);
        sample_idx[i] = ind[temp_idx];
        violation += margin_loss[ind[temp_idx]];
      }
      sample_iter++;
    }
    /* report error if MAX_SAMPLE_ITER exceeded for constraint sampling */
    if (sample_iter>=MAX_SAMPLE_ITER) {
      perror("Max iteration exceeded in constraint sampling!"); 
      exit(1);
    }

    /* copy and update all_sample_idx */
    all_sample_idx = (long**) realloc(all_sample_idx, sizeof(long*)*size_active);
    assert(all_sample_idx!=NULL);
    all_sample_idx[size_active-1] = my_malloc(sizeof(long)*SAMPLE_SIZE);
    for (i=0;i<SAMPLE_SIZE;i++) {
      all_sample_idx[size_active-1][i] = sample_idx[i];
    }
    all_size_c = (long*) realloc(all_size_c, sizeof(long)*size_active);
    assert(all_size_c!=NULL);
    all_size_c[size_active-1] = size_c;


    /* compute Acol_sample */
    for (i=0;i<m;i++) {
      Acol_sample[i] = 0.0;
      for (j=0;j<SAMPLE_SIZE;j++) {
	Acol_sample[i]+= (float) (Y[sample_idx[j]]*my_kernel(&kernel_parm,X[i],X[sample_idx[j]]));
      }
      Acol_sample[i] = Acol_sample[i]/SAMPLE_SIZE*size_c/m;
    }
    A_sample = (float**) realloc(A_sample, sizeof(float*)*size_active); 
    assert(A_sample!=NULL);
    A_sample[size_active-1] = my_malloc(sizeof(float)*m);
    for (i=0;i<m;i++) {
      A_sample[size_active-1][i] = Acol_sample[i];
    }
	
    for (i=0;i<size_active;i++) {
      G_sample_col[i] = 0.0;
      for (j=0;j<SAMPLE_SIZE;j++) {
	G_sample_col[i]+=((double) A_sample[i][sample_idx[j]])*Y[sample_idx[j]];
      }
      G_sample_col[i] = G_sample_col[i]/SAMPLE_SIZE*size_c/m;
    }
	
    G_sample = realloc(G_sample, sizeof(double*)*size_active);
    assert(G_sample!=NULL);
    G_sample[size_active-1] = NULL;
    for (i=0;i<size_active;i++) {
      G_sample[i] = realloc(G_sample[i], sizeof(double)*size_active);
      assert(G_sample[i]!=NULL);
    }
    for (i=0;i<size_active-1;i++) {
      G_sample[size_active-1][i] = G_sample_col[i];
      G_sample[i][size_active-1] = G_sample_col[i];
    }
    G_sample[size_active-1][size_active-1] = G_sample_col[size_active-1];
    
    
    /* solve QP to update alpha */
    r = mosek_qp_optimize(G_sample, delta, alpha, (long) size_active, C);
    
    /* update xi */
    xi = 0.0;
    for (i=0;i<size_active;i++) {
      temp = 0.0;
      for (j=0;j<size_active;j++) {
	temp+=G_sample[i][j]*alpha[j];
      }
      if (delta[i]-temp>xi) {
	xi = delta[i] - temp;
      }
    }
    
    /* update cleanup information */
    for (i=0;i<size_active;i++) {
      if (alpha[i]<ALPHA_THRESHOLD*C) {
	idle[i]++;
      } else {
	idle[i]=0;
      }
    }

    /* compute Xw */
    for (i=0;i<m;i++) {
      Xw[i] = 0.0;
      for (j=0;j<size_active;j++) {
	Xw[i] += ((double) A_sample[j][i])*alpha[j];
      }
    }
    /* compute constraint c */
    value = 0.0;
    size_c = 0;
    end_ind = 0;
    train_error = 0;
    for (i=0;i<m;i++) {
      if (1-Y[i]*Xw[i]>0) {
	c[i] = 1;
	ind[end_ind] = i;
	end_ind++;
	value = value + 1 -Y[i]*Xw[i];
	margin_loss[i] = 1 - Y[i]*Xw[i];
      } else {
	c[i] = 0;
      }
      if (Y[i]*Xw[i]<0) train_error++;
    }
    size_c = end_ind;
    value /= m;
      
    /* print iteration */
    printf("Iteration %d\n", iter);

    /* print out iteration statistics */
    printf("Margin Loss: %.8g\n", value);
    printf("Training Error: %.4f\n", ((double) train_error)/m);
    printf("Violation: %.8g\n", value - xi); 

    /* cleanup */
    if (iter % CLEANUP_CHECK==0) {
      size_active = resize_cleanup(size_active, m, SAMPLE_SIZE, idle, alpha, delta, A_sample, G_sample, all_sample_idx, all_size_c); 
    }

  } /* end while */

  
  /* compute training set error */
  train_error = 0;
  for (i=0;i<m;i++) {
    if (Y[i]*Xw[i]<0) train_error++;
  }
  printf("Training set error rate: %.4f\n", ((double) train_error)/m);
  fflush(stdout);

  /* end timer */
  runtime_end = get_runtime();

  printf("Training time in cpu seconds (excluding I/O): %.2f\n", ((float) runtime_end - (float) runtime_start)/100.0);
  fflush(stdout);

  /* test on the test set */
  for (i=0;i<m;i++) {
    beta[i] = 0.0;
  }
  for (i=0;i<size_active;i++) {
    for (j=0;j<SAMPLE_SIZE;j++) {
      beta[all_sample_idx[i][j]]+=alpha[i]*all_size_c[i]/SAMPLE_SIZE;
    }
  }
  num_sv = 0;
  for (i=0;i<m;i++) {
    beta[i]/=m; 
    if (beta[i]>1E-8) {
      sv_idx[num_sv] = i;
      num_sv++;
    }
  }
    

  read_documents(testfile, &test_X, &test_Y, &test_totwords, &test_totdoc);
  test_Xw = (double*) my_malloc(sizeof(double)*test_totdoc);

  /* now classify */
  test_error = 0;
  for (i=0;i<test_totdoc;i++) {
    test_Xw[i]=0.0;
    for (j=0;j<num_sv;j++) {
      test_Xw[i]+=Y[sv_idx[j]]*beta[sv_idx[j]]*my_kernel(&kernel_parm,X[sv_idx[j]],test_X[i]);
    }
    if (test_Xw[i]*test_Y[i]<0) test_error++;
  }
  printf("Test set error rate: %.4f\n", ((double) test_error)/test_totdoc);

  
  /* write to model file */
  init_model(model,num_sv,totdoc,totwords,&kernel_parm);
  for (i=0;i<num_sv;i++) {
    model->supvec[i+1] = X[sv_idx[i]];
	model->alpha[i+1] = Y[sv_idx[i]]*beta[sv_idx[i]];
  }
  write_model(modelfile, model);
  
  
  /* free memory */
  for (i=0;i<size_active;i++) {
    free(A_sample[i]);
    free(G_sample[i]);
  }
  free(A_sample);
  free(G_sample);
  free(G_sample_col);
  free(delta);
  free(alpha);
  free(sample_idx);
  free(c);
  free(Xw);
  free(Acol_sample);
  free(ind);  
  free(idle);
  
  /* free examples and labels */
  for (i=0;i<m;i++) {
    free_example(X[i],1);
  }
  free(X);
  free(Y);

  /* free test set */
  for (i=0;i<test_totdoc;i++) {
    free_example(test_X[i],1);
  }
  free(test_X);
  free(test_Y);
  free(test_Xw);
  free(beta);
  for (i=0;i<size_active;i++) {
    free(all_sample_idx[i]);
  }
  free(all_sample_idx);
  free(all_size_c);
  free(sv_idx);
 
  if (iter==MAX_ITER) {
    printf("Maximum number of iterations exceeded!\n"); fflush(stdout);
  }

  return(0);

}


void my_read_input_parameters(int argc, char *argv[], char *trainfile, char *testfile, char *modelfile,
			   LEARN_PARM *learn_parm, KERNEL_PARM *kernel_parm) {
  
  long i;

  /* set default */
  learn_parm->maxiter=300;
  learn_parm->svm_maxqpsize=100;
  learn_parm->svm_c=100.0;
  learn_parm->eps=0.001;
  learn_parm->biased_hyperplane=12345; /* store random seed */
  kernel_parm->kernel_type=0;
  kernel_parm->rbf_gamma=0.05;
  kernel_parm->coef_lin=1;
  kernel_parm->coef_const=1;
  kernel_parm->poly_degree=3;
  strcpy(kernel_parm->custom,"empty");

  for(i=1;(i<argc) && ((argv[i])[0] == '-');i++) {
    switch ((argv[i])[1]) {
    case 'c': i++; learn_parm->svm_c=atof(argv[i]); break;
    case 'e': i++; learn_parm->eps=atof(argv[i]); break;
    case 's': i++; learn_parm->svm_maxqpsize=atol(argv[i]); break; /* use svm_maxqpsize to store SAMPLE_SIZE */
    case 'g': i++; kernel_parm->rbf_gamma=atof(argv[i]); break;
    case 'd': i++; kernel_parm->poly_degree=atol(argv[i]); break;
    case 'r': i++; learn_parm->biased_hyperplane=atol(argv[i]); break; /* random seed */
    case 't': i++; kernel_parm->kernel_type=atol(argv[i]); break;
    case 'n': i++; learn_parm->maxiter=atol(argv[i]); break;
    default: printf("\nUnrecognized option %s!\n\n",argv[i]);
      exit(0);
    }


  }

  if(i>=argc) {
    printf("\nNot enough input parameters!\n\n");
    my_wait_any_key();
    exit(0);
  }
  strcpy (trainfile, argv[i]);

  if((i+1)<argc) {
    strcpy (testfile, argv[i+1]);
  }
  if((i+2)<argc) {
    strcpy (modelfile, argv[i+2]);
  }


}

void my_wait_any_key()
{
  printf("\n(more)\n");
  (void)getc(stdin);
}

int resize_cleanup(int size_active, int m, long SAMPLE_SIZE, int *idle, 
		   double *alpha, double *delta, float **A_sample, double **G_sample,
		   long **all_sample_idx, long *all_size_c) {
  int i,j,new_size_active;
  long k;

  i=0;
  while ((i<size_active)&&(idle[i]<IDLE_ITER)) i++;
  j=i;
  while ((j<size_active)&&(idle[j]>=IDLE_ITER)) j++;
  
  while (j<size_active) {
    /* copying */
    alpha[i] = alpha[j];
    delta[i] = delta[j];
    all_size_c[i] = all_size_c[j];
    for (k=0;k<m;k++) {
      A_sample[i][k] = A_sample[j][k];
    }
    for (k=0;k<size_active;k++) {
      G_sample[i][k] = G_sample[j][k];
    }
    for (k=0;k<SAMPLE_SIZE;k++) {
      all_sample_idx[i][k] = all_sample_idx[j][k];
    }
    i++;
    j++;
    while ((j<size_active)&&(idle[j]>=IDLE_ITER)) j++;
  }
  new_size_active = i;
  alpha = (double*) realloc(alpha, sizeof(double)*new_size_active);
  delta = (double*) realloc(delta, sizeof(double)*new_size_active);
  all_size_c = (long*) realloc(all_size_c, sizeof(long)*new_size_active);
  for (k=i;k<size_active;k++) {
    free(A_sample[k]);
    free(G_sample[k]);
    free(all_sample_idx[k]);
  }
  A_sample = (float**) realloc(A_sample, sizeof(float*)*new_size_active);
  G_sample = (double**) realloc(G_sample, sizeof(double*)*new_size_active);
  all_sample_idx = (long**) realloc(all_sample_idx, sizeof(long*)*new_size_active);
  /* second pass for idle and G_sample */
  i=0;
  while ((i<size_active)&&(idle[i]<IDLE_ITER)) i++;
  j=i;
  while ((j<size_active)&&(idle[j]>=IDLE_ITER)) j++;

  while (j<size_active) {
    idle[i] = idle[j];
    for (k=0;k<new_size_active;k++) {
      G_sample[k][i] = G_sample[k][j];
    }
    i++;
    j++;
    while ((j<size_active)&&(idle[j]>=IDLE_ITER)) j++;
  }  
  idle = (int*) realloc(idle, sizeof(int)*new_size_active);
  for (k=0;k<new_size_active;k++) {
    G_sample[k] = (double*) realloc(G_sample[k], sizeof(double)*new_size_active);
  }
  return(new_size_active);
}


void init_model(MODEL* model, int num_sv, long totdoc, long totwords, KERNEL_PARM *kernel_parm) {

  model->supvec = (DOC **)my_malloc(sizeof(DOC *)*(num_sv+2));
  model->alpha = (double *)my_malloc(sizeof(double)*(num_sv+2));

  model->at_upper_bound=0;
  model->b=0;	       
  model->supvec[0]=0;  /* element 0 reserved and empty for now */
  model->alpha[0]=0;
  model->lin_weights=NULL;
  model->totwords=totwords;
  model->totdoc=totdoc;
  model->kernel_parm=(*kernel_parm);
  model->sv_num=1+num_sv;
  model->loo_error=-1;
  model->loo_recall=-1;
  model->loo_precision=-1;
  model->xa_error=-1;
  model->xa_recall=-1;
  model->xa_precision=-1;

}
