# visualizer.py # Walker M. White (wmw2) # November 1, 2013 """Visualization App to verify that K-means works Visualization is limited to 3d points and k-values < 15""" import matplotlib import numpy import math import traceback matplotlib.use('TkAgg') # Modules to embed matplotlib in a custom Tkinter window from matplotlib.backends.backend_tkagg import FigureCanvasTkAgg, NavigationToolbar2TkAgg # implement the default mpl key bindings from matplotlib.backend_bases import key_press_handler from matplotlib.figure import Figure from mpl_toolkits.mplot3d import Axes3D # File support to load data files import os import Tkinter as Tk import tkFileDialog import tkMessageBox import tkFont # The k-means implementation from a5 import * # Maximum allowable k-means MAX_K_VAL = 14 def parse_data(data): """Return: 3-element list equivalent to file line Precondition: data is a line from a a5d data file.""" return map(float,data[1:-2].split(',')) class Visualizer(object): """Instance is a visualization app. INSTANCE ATTRIBUTES: _root: TCL/TK graphics backend [TK object] _canvas: MatPlotLib canvas [FigureCanvas object] _axes: MatPlotLib axes [Axes object] _db: Data set [Database object] _count: Number of steps executed [int >= 0] _finish: Whether the computation is done [bool] There are several other attributes for GUI widgets (buttons and labels). We do not list all of them here.""" def __init__(self): """Initializar: Make a visualization app""" self._root = Tk.Tk() self._root.wm_title("Assignment 5 Visualizer") self._db = None # Start the application self._config_canvas() self._config_control() self._canvas.show() Tk.mainloop() def _config_canvas(self): """Load the MatPlotLib drawing code""" # Create the drawing canvas figure = Figure(figsize=(6,6), dpi=100) self._canvas = FigureCanvasTkAgg(figure, master=self._root) self._canvas._tkcanvas.pack(side=Tk.LEFT, expand=True, fill=Tk.BOTH) # Initialize the scatter plot self._axes = figure.gca(projection='3d') self._axes.set_xlim((0.0, 1.0)) self._axes.set_ylim((0.0, 1.0)) self._axes.set_zlim((0.0, 1.0)) self._axes.set_xlabel('X') self._axes.set_ylabel('Y') self._axes.set_zlabel('Z') self._axes.set_xticks(numpy.arange(0.0,1.0,0.1)) self._axes.set_yticks(numpy.arange(0.0,1.0,0.1)) self._axes.set_zticks(numpy.arange(0.0,1.0,0.1)) self._axes.tick_params(labelsize=9) def _config_control(self): """Create the control panel on the right hand side This method is WAY too long, but GUI layout code is typically like this. Plus, Tkinter makes this even worse than it should be.""" panel = Tk.Frame(master=self._root) panel.columnconfigure(0,pad=3) panel.columnconfigure(1,pad=3) panel.rowconfigure(0,pad=3) panel.rowconfigure(1,pad=0) panel.rowconfigure(2,pad=23) panel.rowconfigure(3,pad=3) panel.rowconfigure(4,pad=3) panel.rowconfigure(5,pad=3) panel.rowconfigure(6,pad=13) title = Tk.Label(master=panel,text='K Means Control',height=3) wfont = tkFont.Font(font=title['font']) wfont.config(weight='bold',size=20) title.grid(row=0,columnspan=2, sticky='we') title.config(font=wfont) divider = Tk.Frame(master=panel,height=2, bd=1, relief=Tk.SUNKEN) divider.grid(row=1,columnspan=2, sticky='we') # Label and button for managing files. label = Tk.Label(master=panel,text='Data Set: ',height=2) wfont = tkFont.Font(font=label['font']) wfont.config(weight='bold') label.config(font=wfont) label.grid(row=2,column=0, sticky='e') self._filebutton = Tk.Button(master=panel, text='<select file>', width=10,command=self._load) self._filebutton.grid(row=2,column=1, sticky='w',padx=(0,10)) # Label and option menu to select k-value label = Tk.Label(master=panel,text='K Value: ',height=2,font=wfont) label.grid(row=3,column=0,sticky='e') self._kval = Tk.IntVar(master=self._root) self._kval.set(3) options = Tk.OptionMenu(panel,self._kval,*range(1,MAX_K_VAL+1),command=self._reset) options.grid(row=3,column=1,sticky='w') # Label and step indicator label = Tk.Label(master=panel,text='At Step: ',height=2,font=wfont) label.grid(row=4,column=0,sticky='e') self._count = 0 self._countlabel = Tk.Label(master=panel,text='0') self._countlabel.grid(row=4,column=1,sticky='w') # Label and convergence indicator label = Tk.Label(master=panel,text='Finished: ',height=2,font=wfont) label.grid(row=5,column=0,sticky='e') self._finished = False self._finishlabel = Tk.Label(master=panel,text='False') self._finishlabel.grid(row=5,column=1,sticky='w') # Control buttons button = Tk.Button(master=panel, text='Reset', width=8, command=self._reset) button.grid(row=6,column=0,padx=(10,0)) button = Tk.Button(master=panel, text='Step', width=8, command=self._step) button.grid(row=6,column=1) panel.pack(side=Tk.RIGHT, fill=Tk.Y) def _plot_clusters(self): """Plot the clusters in a completed (Part I) assignment""" COLORS = ('r','g','b','k','c','m','y') for k in range(self._kval.get()): c = COLORS[k % len(COLORS)] m = 'x' if k % 2 == 0 else '+' cluster = self._db.getCluster(k) rows = numpy.array(cluster.getContents()) cent = cluster.getCentroid() if (len(rows) > 0): self._axes.scatter(rows[:,0], rows[:,1], rows[:,2], c=c, marker=m) self._axes.scatter(cent[0],cent[1],cent[2],c=c,s=30,marker='o') def _plot_one_cluster(self): """Plot the clusters in a assignment that has finished Part B""" # Try to show everything in one cluster. cluster = Cluster(self._db.getContents()[0]) for x in self._db.getContents(): cluster.appendContents(x) rows = numpy.array(cluster.getContents()) cent = cluster.getCentroid() if (len(rows) > 0): self._axes.scatter(rows[:,0], rows[:,1], rows[:,2], c='b', marker='+') self._axes.scatter(cent[0],cent[1],cent[2],c='b',s=30,marker='o') def _plot_points(self): """Plot the clusters in a assignment that has finished Part A""" rows = numpy.array(self._db.getContents()) self._axes.scatter(rows[:,0], rows[:,1], rows[:,2], c='k', marker='+') def _plot(self): """General plot function This function replots the data any time that it changes.""" assert not self._db is None, 'Invariant Violation: Attempted to plot when data set is None' self._axes.clear() try: # Check if at part C if self._db.getKSize() == 0: self._db._clusters = [None,None,None] # Create a list of 3 elements self._db._clusters[0] = Cluster([0.2,0.2,0.2]) self._db._clusters[1] = Cluster([0.2,0.8,0.8]) self._db._clusters[2] = Cluster([0.8,0.2,0.5]) self._db._ksize = 3 self._db.partition() assert type(self._db.nearest([0.2,0.2,0.2])) == Cluster, 'Part C not complete' self._plot_clusters() except BaseException as e: print 'FAILED VISUALIZATION: ' traceback.print_exc() print '' print 'Attempting Part B Only' try: self._db._clusters = [] self._db._ksize = 0 self._plot_one_cluster() except BaseException as e: print 'FAILED PART B: ' traceback.print_exc() print '' print 'Attempting Part A Only' self._plot_points() # Reset axes information xb = self._axes.get_xbound() xb = (numpy.floor(xb[0]*10)/10.0,numpy.ceil(xb[1]*10)/10.0) self._axes.set_xlim(xb) self._axes.set_xticks(numpy.arange(xb[0],xb[1],0.1)) yb = self._axes.get_ybound() yb = (numpy.floor(yb[0]*10)/10.0,numpy.ceil(yb[1]*10)/10.0) self._axes.set_ylim(yb) self._axes.set_yticks(numpy.arange(yb[0],yb[1],0.1)) zb = self._axes.get_zbound() zb = (numpy.floor(zb[0]*10)/10.0,numpy.ceil(zb[1]*10)/10.0) self._axes.set_zlim(zb) self._axes.set_zticks(numpy.arange(zb[0],zb[1],0.1)) self._axes.set_xlabel('X') self._axes.set_ylabel('Y') self._axes.set_zlabel('Z') self._canvas.show() def _load(self): """Load a data set file into a Database""" filename = tkFileDialog.askopenfilename(initialdir='.', title='Select a Data File', filetypes=[('A5 Data Files', '.a5d')]) shortname = os.path.split(filename)[1] if (len(shortname) > 10): shortname = shortname[0:10]+'...' self._filebutton.configure(text=shortname) if filename is None: return f = open(filename) contents = [] for x in f: contents.append(parse_data(x)) self._db = Database(3,contents) self._reset() def _reset(self,k=0): """Reset the k-means calculation with the give k value. Precondition: k >= 0 is an int.""" if self._db is None: tkMessageBox.showwarning('Reset','ERROR: No data set loaded.') self._count = 0 self._countlabel.configure(text='0') self._finished = False self._finishlabel.configure(text='False') # Student may not have implemented this yet. self._db.setKSize(self._kval.get()) print self._db.getKSize() self._plot() def _step(self): """Perform one step in k-means clustering""" if self._db is None: tkMessageBox.showwarning('Step','ERROR: No data set loaded.') if self._finished: return self._count = self._count+1 self._countlabel.configure(text=str(self._count)) self._finished = self._db.step() self._finishlabel.configure(text=str(self._finished)) self._plot() # Script code if __name__ == '__main__': Visualizer()