#!/bin/bash
# This script will create a cumulative log-log plot for zipf data
# Usage: ./convert <file>
# where file is of the format: 
#	<Header line>
#	<n=num vertices> <m=num edges>
#	<weight 1> [neighbor1] [neighbor2] ... 
#	<weight 2> [neighbor1] [neighbor2] ... 
#	...
#	<weight n> [neighbor1] [neighbor2] ... 

gawk 'NR > 2 {print NF-1}' $1 | sort -n | uniq -c > $1.out
echo "set logscale; plot \"$1.out\" using 2:1" | gnuplot -persist -

gawk '{sum += $1; print sum,$2} END {print sum > "max"}' $1.out > $1.cum
echo "set logscale; plot \"$1.cum\" using ($(cat max)-\$1):2" | gnuplot -persist -

rm $1.out $1.cum

