#include "CNMap.h"
#include "LogisticEmbed_common.h"
#include <assert.h>
#include <math.h>
#include <stdio.h>
#include <stdlib.h>
#include <string.h>


void free_CNMap(CNMap cnm)
{
	int i;
	for(i = 0; i < cnm.num_clusters; i++)
		free(cnm.cluster_node_map[i]);
	free(cnm.node_cluster_map);
	free(cnm.cluster_node_map);
	free(cnm.num_nodes_for_each_cluster);
}

CNMap create_CNmap(int num_clusters, int num_nodes, int* cluster_size_array, int* nnz_array)
{
	if(num_nodes <= num_clusters)
	{
		printf("Number of processes should be greater than the number of clusters.\n");
		exit(1);
	}
	CNMap cnm;
	cnm.num_clusters = num_clusters;
	cnm.num_nodes = num_nodes;
	cnm.node_cluster_map = (int*)malloc(num_nodes * sizeof(int));
	cnm.cluster_node_map = (int**)malloc(num_clusters * sizeof(int*));
	cnm.num_nodes_for_each_cluster = (int*)calloc(num_clusters, sizeof(int));
	int i;
	double scores[num_clusters];
	for(i = 0; i < num_clusters; i++)
		//scores[i] = (double)pow(cluster_size_array[i], 2);
		scores[i] = (double)cluster_size_array[i] * (double)nnz_array[i];
	//print_int_vec(cluster_size_array, num_clusters);
	rr_assign(num_clusters, num_nodes, scores, cnm.num_nodes_for_each_cluster);
	//print_int_vec(cnm.num_nodes_for_each_cluster, num_clusters);
	for(i = 0; i < num_clusters; i++)
		assert(cnm.num_nodes_for_each_cluster[i] > 0);
	int current_node_idx = 0;
	int j;
	for(i = 0; i < num_clusters; i++)
	{
		cnm.cluster_node_map[i] = (int*)malloc(cnm.num_nodes_for_each_cluster[i] * sizeof(int));
		for(j = 0; j < cnm.num_nodes_for_each_cluster[i]; j++, current_node_idx++)
		{
			cnm.node_cluster_map[current_node_idx] = i;
			cnm.cluster_node_map[i][j] = current_node_idx;
		}
	}
	//print_int_vec(cnm.node_cluster_map, num_nodes);
	//for(i = 0 ; i < num_clusters; i++)
		//print_int_vec(cnm.cluster_node_map[i], cnm.num_nodes_for_each_cluster[i]);
	return cnm;
}

//n people, m items (m >= n). Everyone gets at least one. All items have to be assigned. Round-Robin.
void rr_assign(int n, int m, double* scores,  int* num_assigned)
{
	int i;
	double sum_score = sum_vec(scores, n);
	double max_allowed[n];
	for(i = 0; i < n; i++)
	{
		max_allowed[i] = (double)m * scores[i] / sum_score;
		num_assigned[i] = 1;
	}
	int num_item_remained = m - n;
	int t = 0;
	while(num_item_remained)
	{
		if(num_assigned[t] < (int)ceil(max_allowed[t]))
		{
			num_assigned[t]++;
			num_item_remained--;
		}
		t = (t + 1) % n;
	}
	int temp_sum = 0;
	for(i = 0; i < n; i++)
	{
		assert(num_assigned[i] > 0);
		temp_sum += num_assigned[i];
	}
	assert(temp_sum == m);
}

void free_NCMap(NCMap ncm)
{
	int i;
	for(i = 0; i < ncm.num_nodes; i++)
		free(ncm.node_cluster_map[i]);
	free(ncm.node_cluster_map);
	free(ncm.cluster_node_map);
	free(ncm.num_clusters_on_each_node);
}

NCMap create_NCmap(int num_clusters, int num_nodes, int* cluster_size_array, int* nnz_array)
{
	if(num_nodes > num_clusters)
	{
		printf("Number of processes should be less or equal than the number of clusters.\n");
		exit(1);
	}
	NCMap ncm;
	ncm.num_clusters = num_clusters;
	ncm.num_nodes = num_nodes;
	ncm.cluster_node_map = (int*)malloc(num_clusters * sizeof(int));
	ncm.num_clusters_on_each_node = (int*)malloc(num_nodes * sizeof(int));
	ncm.node_cluster_map = (int**)malloc(num_nodes * sizeof(int*));

	int i;
	double scores[num_clusters];
	double scores_for_nodes[num_nodes];
	int temp_idx_array[num_clusters];
	memset(scores_for_nodes, 0, num_nodes * sizeof(double));
	for(i = 0; i < num_clusters; i++)
		//scores[i] = (double)pow(cluster_size_array[i], 2);
		scores[i] = (double)cluster_size_array[i] * (double)nnz_array[i];

	sort_with_idx(scores, num_clusters, temp_idx_array, 1, 0);


	//greedy assignment
	for(i = 0; i < num_clusters; i++)
	{
		ncm.cluster_node_map[temp_idx_array[i]] = find_extreme_idx(scores_for_nodes, num_nodes, 0);
		scores_for_nodes[ncm.cluster_node_map[temp_idx_array[i]]] += scores[i];
	}
	memset(ncm.num_clusters_on_each_node, 0, num_nodes * sizeof(int));
	for(i = 0; i < num_clusters; i++)
		ncm.num_clusters_on_each_node[ncm.cluster_node_map[i]]++;
	for(i = 0; i < num_nodes; i++)
		ncm.node_cluster_map[i] = (int*)malloc(ncm.num_clusters_on_each_node[i] * sizeof(int));
	int current_idx_to_fill[num_nodes];
	memset(current_idx_to_fill, 0, num_nodes * sizeof(int));
	int its_node;
	for(i = 0; i < num_clusters; i++)
	{
		its_node = ncm.cluster_node_map[i];
		ncm.node_cluster_map[its_node][current_idx_to_fill[its_node]++] = i;
	}

	//for(i = 0; i < num_nodes; i++)
		//print_int_vec(ncm.node_cluster_map[i], ncm.num_clusters_on_each_node[i]);
	return ncm;
}
