
import java.util.*;

public class MST  {

	List<Document> docs;

	public MST(){
		docs = new ArrayList<Document>();
	}

	/**
	 * 
	 * @param d the document to add
	 * @return true iff the addition was successful
	 */
    public boolean add(Document d){
		return docs.add(d);
	}

	/**
	 * Get the documents in the set
	 * @return a Set of documents.
	 */
	public Set<Document> getDocuments(){
		return new HashSet<Document>(docs);
	}

	class SimilarityInfo implements Comparable<SimilarityInfo>{
		float sim;
		int a;
		int b;
		
		public SimilarityInfo(int first, int second, float s){
			a=first;
			b=second;
			sim=s;
		}

		@Override
		public int compareTo(SimilarityInfo other) {
			float diff = sim - other.sim;
			if (diff > 0)
				return 1;
			else if (diff < 0)
				return -1;
			return 0;
		}
	}

	private List<SimilarityInfo> computeSimilarities() {
		int n = docs.size();
		float[] t = new float[n];
		List<SimilarityInfo> si = new ArrayList<SimilarityInfo>();
		
		/* Compute a list of documents that contain each word */
		Map<String,Set<Integer>> docsUsing = new HashMap<String,Set<Integer>>();
		for(int i=0; i<n; i++){
			for(String s: docs.get(i).getWords()){
				Set<Integer> docset = docsUsing.get(s);
				if (docset == null){
					docset = new HashSet<Integer>();
					docsUsing.put(s,docset);
				}
				docset.add(i);
			}
		}
		
		/* Compute similarities between documents.
		 * At the same time store the sum of 
		 * similarities of a document over all 
		 * other documents.
		 */
		for(int i=0; i<n; i++){
			Map<Integer,Integer> score = new HashMap<Integer,Integer>();
			for(String ue: docs.get(i).getWords()){
				for(int r: docsUsing.get(ue)){
					if(r<=i) continue;
					Integer s = score.get(r);
					if (s==null){
						score.put(r, 1);
					}
					else{
						score.put(r, s+1);
					}
				}
			}
			for (Map.Entry<Integer, Integer> e: score.entrySet()){
				int j = e.getKey();
				int s = e.getValue();
				si.add(new SimilarityInfo(i,j,s));
				t[i]+=s;
				t[j]+=s;
			}
		}
	
		/* Normalize */
		for(SimilarityInfo s:si){
			s.sim /= Math.sqrt(t[s.a]*t[s.b])+0.001;
		}
		return si;
	}

	UnionFind spanningTree() {
		
		/* Run the the minimum spanning tree algorithm */  
		/* Initialize some data structures */
		int n = docs.size();
		UnionFind uf = new UnionFind(n);

		List<SimilarityInfo> si = computeSimilarities();
		/* Sort the list of similarities */
		Collections.sort(si);

		/* Greedily pick similarities */
		for(int k=si.size()-1; k>=0; k--){
			int i = uf.find(si.get(k).a);
			int j = uf.find(si.get(k).b);
			if(i==j) /* Nothing to do */ 
				continue;
			System.out.println("Linking cluster of \""+docs.get(i).getTitle()+"\" with cluster of \""+docs.get(j).getTitle()+"\"");
			uf.link(i, j);
		}
		return uf;
	}
}
