import networkx
import sys
import MasterFunctions

#input the name of the edge file, read the edges
edge_file = sys.argv[1]

to_add_edges = []
edges = {}
node_index = {}
reverse_index = {}
count = 0
IN = open(edge_file, 'r')
read_line = IN.readline()
nodes = []
while(read_line):
    t = read_line.rstrip().split()
    if len(t) > 0:
        if t[0] != t[1]:
            #index the nodes to save memory
            if t[0] not in node_index:
                node_index[t[0]] = count
                reverse_index[count] = t[0]
                edges[count] = set([])
                nodes.append(count)
                count += 1
            if t[1] not in node_index:
                node_index[t[1]] = count
                reverse_index[count] = t[1]
                edges[count] = set([])
                nodes.append(count)
                count += 1
            edges[node_index[t[0]]].add(node_index[t[1]])
            edges[node_index[t[1]]].add(node_index[t[0]])
    read_line = IN.readline()
IN.close()

##make the graph
G = networkx.Graph()
G.add_nodes_from(range(count))
count = 0
for i in edges:
    adj = list(edges[i])
    for j in adj:
        if i < j:
            to_add_edges.append((i, j))
    count += 1

G.add_edges_from(to_add_edges)

print 'done making graph'

comm_file = sys.argv[2]
out_file = sys.argv[3]
if len(sys.argv) > 4:
    class_num = sys.argv[4]

OUT = open(out_file, 'w')
IN = open(comm_file, 'r')
read_line = IN.readline()
count = 1
line_num = 1
flag = 0
big = {}
while(read_line):
    print 'On community ' + str(count)
    count += 1
    sys.stdout.flush()
    t = read_line.rstrip().split()
    t = set(t)
    nodes = [node_index[x] for x in t]
    if len(t) >= 2:
        #get the subgraph corresponding to the community
        H = G.subgraph(nodes)
        in_links = len(H.edges())
        all_links = 0
        if networkx.number_connected_components(H) != 1:
            OUT.write("Set is not connected\n")
        else:
            SVM_count = 0
            if len(sys.argv) > 4:
                OUT.write(class_num + '\t')
            for v in nodes:
                all_links += G.degree(v)
            ##get num vertices
            OUT.write(str(SVM_count) + ':' + str(len(t)) + '\t')
            SVM_count += 1
            ##get num edges
            OUT.write(str(SVM_count) + ':' + str(len(H.edges())) + '\t')
            SVM_count += 1
            ##get diameter
            if networkx.number_connected_components(H) == 1:
                OUT.write(str(SVM_count) + ':' + str(networkx.diameter(H)) + '\t')
                SVM_count += 1
            else:
                OUT.write(str(SVM_count) + ':' + "inf" + '\t')
                SVM_count += 1
            ##get edge density
            OUT.write(str(SVM_count) + ':' + str(float(len(H.edges())/float(len(H.nodes()) * len(H.nodes())))) + '\t')
            SVM_count += 1
            ##get conductance
            cond = float(in_links)/float(all_links)
            OUT.write(str(SVM_count) + ':' + str(cond) + '\t')
            SVM_count += 1
            ##get transitivity
            OUT.write(str(SVM_count) + ':' + str(networkx.transitivity(H)) + '\t')
            SVM_count += 1
    
            ##get triangle density
            den = MasterFunctions.GetTriangleDensity(H)
            OUT.write(str(SVM_count) + ':' + str(den) + '\t')
            SVM_count += 1
            #get all pairs shortest path lengths
            path_mat = networkx.all_pairs_shortest_path_length(H)
            paths = []
            for p in path_mat:
                i = path_mat[p]
                for q in i:
                    if i[q] > 0 and i[q] < len(H.nodes()):
                        paths.append(i[q])
            paths.sort()
            first = paths[0]
            second = paths[len(paths)/4]
            third = paths[len(paths)/2]
            fourth = paths[3*len(paths)/4]
            fifth = paths[len(paths) - 1]
            ##path length 0
            OUT.write(str(SVM_count) + ':' + str(first) + '\t')
            SVM_count += 1
            ##path length 25
            OUT.write(str(SVM_count) + ':' + str(second) + '\t')
            SVM_count += 1
            ##path length 50
            OUT.write(str(SVM_count) + ':' + str(third) + '\t')
            SVM_count += 1
            ##path length 75
            OUT.write(str(SVM_count) + ':' + str(fourth) + '\t')
            SVM_count += 1
            ##path length 100
            OUT.write(str(SVM_count) + ':' + str(fifth) + '\t')
            SVM_count += 1
            ##edge betweenness
            between = networkx.edge_betweenness_centrality(H)
            edge_cent = []
            for i in between:
                edge_cent.append(between[i])
            edge_cent.sort()
            first = edge_cent[0]
            second = edge_cent[len(edge_cent)/4]
            third = edge_cent[len(edge_cent)/2]
            fourth = edge_cent[3 * len(edge_cent)/4]
            fifth = edge_cent[len(edge_cent) - 1]
            OUT.write(str(SVM_count) + ':' + str(first) + '\t')
            SVM_count += 1
            ##edge cent 25
            OUT.write(str(SVM_count) + ':' + str(second) + '\t')
            SVM_count += 1
            ##pedge cent 50
            OUT.write(str(SVM_count) + ':' + str(third) + '\t')
            SVM_count += 1
            ##edge cent 75
            OUT.write(str(SVM_count) + ':' + str(fourth) + '\t')
            SVM_count += 1
            ##edge cent 100
            OUT.write(str(SVM_count) + ':' + str(fifth) + '\t')
            SVM_count += 1
            ##node betweenness centrality
            between = networkx.betweenness_centrality(H)
            node_cent = []
            for i in between:
                node_cent.append(between[i])
            node_cent.sort()
            first = node_cent[0]
            second = node_cent[len(node_cent)/4]
            third = node_cent[len(node_cent)/2]
            fourth = node_cent[3 * len(node_cent)/4]
            fifth = node_cent[len(node_cent) - 1]
            OUT.write(str(SVM_count) + ':' + str(first) + '\t')
            SVM_count += 1
            ##node cent 25
            OUT.write(str(SVM_count) + ':' + str(second) + '\t')
            SVM_count += 1
            ##node cent 50
            OUT.write(str(SVM_count) + ':' + str(third) + '\t')
            SVM_count += 1
            ##node cent 75
            OUT.write(str(SVM_count) + ':' + str(fourth) + '\t')
            SVM_count += 1
            ##node cent 100
            OUT.write(str(SVM_count) + ':' + str(fifth) + '\t')
            SVM_count += 1
            fringe = set(H.nodes())
            for n in H.nodes():
                fringe = fringe.union(set(G.neighbors(n)))
            I = G.subgraph(fringe)
            [alpha_vals, beta_vals] = MasterFunctions.GetABVals(I, H)
            alpha_vals.sort()
            beta_vals.sort()
            ##alpha vals
            first = alpha_vals[0]
            second = alpha_vals[len(alpha_vals)/4]
            third = alpha_vals[len(alpha_vals)/2]
            fourth = alpha_vals[3 * len(alpha_vals)/4]
            fifth = alpha_vals[len(alpha_vals) - 1]
            OUT.write(str(SVM_count) + ':' + str(first) + '\t')
            SVM_count += 1
            OUT.write(str(SVM_count) + ':' + str(second) + '\t')
            SVM_count += 1
            OUT.write(str(SVM_count) + ':' + str(third) + '\t')
            SVM_count += 1
            OUT.write(str(SVM_count) + ':' + str(fourth) + '\t')
            SVM_count += 1
            OUT.write(str(SVM_count) + ':' + str(fifth) + '\t')
            SVM_count += 1
            
            ##beta vals
            first = beta_vals[0]
            second = beta_vals[len(beta_vals)/4]
            third = beta_vals[len(beta_vals)/2]
            fourth = beta_vals[3 * len(beta_vals)/4]
            fifth = beta_vals[len(beta_vals) - 1]
            OUT.write(str(SVM_count) + ':' + str(first) + '\t')
            SVM_count += 1
            OUT.write(str(SVM_count) + ':' + str(second) + '\t')
            SVM_count += 1
            OUT.write(str(SVM_count) + ':' + str(third) + '\t')
            SVM_count += 1
            OUT.write(str(SVM_count) + ':' + str(fourth) + '\t')
            SVM_count += 1
            OUT.write(str(SVM_count) + ':' + str(fifth) + '\t')
            SVM_count += 1
            
            #tree sum- number of spanning trees in the graph
            ts = MasterFunctions.TreeSum(nodes, edges)
            OUT.write(str(SVM_count) + ':' + str(ts) + '\t')
            SVM_count += 1

            #information centrality
            vals = MasterFunctions.CurrentFlow(H)
            OUT.write(str(SVM_count) + ':' + str(vals[0]) + '\t')
            SVM_count += 1
            OUT.write(str(SVM_count) + ':' + str(vals[len(vals)/4]) + '\t')
            SVM_count += 1
            OUT.write(str(SVM_count) + ':' + str(vals[2*len(vals)/4]) + '\t')
            SVM_count += 1
            OUT.write(str(SVM_count) + ':' + str(vals[3*len(vals)/4]) + '\t')
            SVM_count += 1
            OUT.write(str(SVM_count) + ':' + str(vals[len(vals) - 1]) + '\n')
            SVM_count += 1
            del H
            del I
    read_line = IN.readline()
    line_num += 1
   
IN.close()
OUT.close()
