/* kmeans.c */

#include <float.h>
#include <stdio.h>
#include <stdlib.h>
#include <string.h>

/* Utility functions.  Feel free to use these. */

/* Choose k numbers at random from 0 to n-1 */
static void choose(int n, int k, int *arr)
{
    int i;
    
    if (k > n) {
        printf("[choose] Error: k > n\n");
        return;
    }

    for (i = 0; i < k; i++) {
        while (1) {
            int idx = rand() % n;
            int j, redo = 0;

            for (j = 0; j < i; j++) {
                if (idx == arr[j]) {
                    redo = 1;
                    break;
                }
            }

            if (!redo) {
                arr[i] = idx;
                break;
            }
        }
    }
}

/* Copy 'dim' elements to array 'vec' from array 'v' */
static void fill_vector(double *vec, unsigned char *v, int dim)
{
    int i;
    for (i = 0; i < dim; i++) 
        vec[i] = (double) v[i];
}

/* Accumulate array 'v' (of dimension 'dim') into array 'acc' */
static void vec_accum(int dim, double *acc, unsigned char *v)
{
    int i;
    for (i = 0; i < dim; i++) {
        acc[i] += (double) v[i];
    }
}

/* Scale array 'v' (of length 'dim') by factor 'scale' */
static void vec_scale(int dim, double *v, double scale) 
{
    int i;
    for (i = 0; i < dim; i++) {
        v[i] *= scale;
    }
}

/* Compute the difference of array 'a' and 'b' (of length 'dim'),
 * store in 'r' */
static void vec_diff(int dim, double *a, double *b, double *r)
{
    int i;
    for (i = 0; i < dim; i++) {
        r[i] = a[i] - b[i];
    }
}

/* Compute the squared length of an array 'v' (of length 'dim') */
static double vec_normsq(int dim, double *v)
{
    double norm = 0.0;
    int i;
    for (i = 0; i < dim; i++) 
        norm += v[i] * v[i];

    return norm;
}

/* Function compute_means.  
 * This function recomputes the means based on the current clustering
 * of the points.
 * 
 * Inputs: 
 *   n          : number of input descriptors
 *   dim        : dimension of each input descriptor
 *   k          : number of means
 *   v          : array of pointers to dim-dimensional descriptors
 *   clustering : current assignment of descriptors to means (should
 *                range between 0 and k-1)
 * 
 * Output: 
 *   means_out  : array of output means.  You need to fill this
 *                array.  The means should be concatenated into one
 *                long array of length k*dim.
 */
double compute_means(int n, int dim, int k, unsigned char **v, 
                     unsigned int *clustering, double *means_out)
{
    /* *** TODO 1 ***
     * You need to fill in the output array as described above. */
	return 0;
}

double compute_error(int n, int dim, int k, unsigned char **v,
                     double *means, unsigned int *clustering)
{
    /* *** TODO 1.5 ***
     * Compute the error in the current clustering via the kmeans
     * energy function */
	return 0;
}

/* Function compute_clustering.  
 * This function recomputes the clustering based on the current means.
 * 
 * Inputs: 
 *   n          : number of input descriptors
 *   dim        : dimension of each input descriptor
 *   k          : number of means
 *   v          : array of pointers to dim-dimensional descriptors
 *   means      : current means, stored in a k*dim dimensional array
 * 
 * Output: 
 *   clustering : new assignment of descriptors to nearest means
 *                (should range between 0 and k-1)
 *   error_out  : total error of the new assignment
 *   
 * Return value : return the number of points that changed assignment
 */
int compute_clustering(int n, int dim, int k, unsigned char **v,
                       double *means, unsigned int *clustering, 
                       double &error_out)
{
    /* *** TODO 2 *** 
     * 
     * Implement this function.  */
	return 0;
}

/* Function kmeans.  
 * Run kmeans clustering on a set of input descriptors.
 * 
 * Inputs: 
 *   n          : number of input descriptors
 *   dim        : dimension of each input descriptor
 *   k          : number of means to compute
 *   restarts   : number of random restarts to perform
 *   v          : array of pointers to dim-dimensional descriptors
 * 
 * Output: 
 *   means      : array of output means.  The means should be
 *                concatenated into one long array of length k*dim.
 *   clustering : assignment of descriptors to means (should
 *                range between 0 and k-1), stored as an array of
 *                length n.  clustering[i] contains the
 *                cluster ID for point i
 */
double kmeans(int n, int dim, int k, int restarts, unsigned char **v, 
              double *means, unsigned int *clustering)
{
    /* 
     * *** TODO 3 ***
     * 
     * Write this function.  It might be helpful to call
     * compute_clustering and compute_means inside. */
	return 0;
}
