# a5test.py # Walker M. White (wmw2) # November 1, 2013 """Unit test for k-means clustering""" import cornelltest # Make sure you have NEW version import random import numpy # The module to test. from a5 import * def testA(): """Test Part A (of Part I) of the assignment. This test procedure cannot test getCluster, as there are no clusters yet. That test is moved to part B.""" print ' Testing Part A' # TEST CASE 1 # Create and test an empty database dbase = Database(3) cornelltest.assert_equals(3,dbase.getDimension()) cornelltest.assert_equals(0,dbase.getKSize()) # We use this BRAND NEW ASSERT to compare lists cornelltest.assert_float_lists_equal([],dbase.getContents()) # Add something to the database (and check it was added) dbase.appendContents([0.0,0.5,4.2]) # Database is a 2D-list. cornelltest.assert_float_lists_equal([[0.0,0.5,4.2]],dbase.getContents()) # And clear it dbase.clearContents() cornelltest.assert_float_lists_equal([],dbase.getContents()) print ' Default initialization looks okay' # TEST CASE 2 # Create and test a non-empty database items = [[0.0,0.0,0.0],[1.0,0.0,0.0],[0.0,1.0,0.0],[0.0,0.0,1.0]] dbase = Database(3,items) cornelltest.assert_equals(3,dbase.getDimension()) cornelltest.assert_equals(0,dbase.getKSize()) # Check that contents is initialized correctly # Make sure items is COPIED cornelltest.assert_float_lists_equal(items,dbase.getContents()) cornelltest.assert_not_equals(id(items),id(dbase.getContents())) # Add something to the database (and check it was added) extra = [0.0,0.5,4.2] dbase.appendContents(extra) items.append(extra) cornelltest.assert_float_lists_equal(items,dbase.getContents()) # Check the point was COPIED cornelltest.assert_false(id(extra) in map(id,dbase.getContents())) # And clear it dbase.clearContents() cornelltest.assert_float_lists_equal([],dbase.getContents()) print ' User-given contents looks okay' print ' Part A appears correct' def testB(): """Test Part B (of Part I) of the assignment. This test procedure includes getCluster from part A. To test getCluster, we have to initialize your _clusters attribute. We can only do this by accessing the hidden attribute _clusters in this function. Normally, this is bad programming (hidden attributes can be used inside of the class definition, but not outside). But sometimes rules are meant to be broken, and testing is a good time to break rules.""" print ' Testing Part B' # TEST CASE 1 # Create and test a cluster (always empty) point = [0.0,1.0,0.0] cluster1 = Cluster(point) # Compare centroid and contents cornelltest.assert_float_lists_equal(point,cluster1.getCentroid()) cornelltest.assert_float_lists_equal([],cluster1.getContents()) # Make sure centroid COPIED cornelltest.assert_not_equals(id(point),id(cluster1.getContents())) # Add something to cluster (and check it was added) extra = [0.0,0.5,4.2] cluster1.appendContents(extra) # Cluster is a 2D-list. cornelltest.assert_float_lists_equal([extra],cluster1.getContents()) # Check the point was COPIED cornelltest.assert_false(id(extra) in map(id,cluster1.getContents())) # And clear it cluster1.clearContents() cornelltest.assert_float_lists_equal([],cluster1.getContents()) print ' Basic cluster methods look okay' # TEST CASE 2 (getCluster) # Make a second cluster cluster2 = Cluster([0.0,0.0,0.0]) # Now make a database and put these in _clusters attribute dbase = Database(3) # THIS VIOLATES GOOD PROGRAMMING. But sometimes rules must be broken. dbase._clusters = [cluster1,cluster2] dbase._ksize = 2 # Check that I get the right objects back # MUST COMPARE FOLDER IDENTIFIERS. Use the id function. cornelltest.assert_equals(id(cluster1),id(dbase.getCluster(0))) cornelltest.assert_equals(id(cluster2),id(dbase.getCluster(1))) print ' Method getCluster() looks okay' print ' Part B appears correct' def testC(): """Test Part C (of Part I) of the assignment. This test checks the methods both nearest and partition. For these checks, it has to go ahead and initialize some clusters (which is done in Part D). We do this by accessing your hidden attributes. Normally, this is bad programming (hidden attributes can be used inside of the class definition, but not outside). But sometimes rules are meant to be broken, and testing is a good time to break rules.""" print ' Testing Part C' # FOR BOTH TEST CASES # Create and test a non-empty database items = [[1.0,0.0,0.0],[0.0,1.0,0.0],[0.0,0.0,0.0],[0.0,0.0,1.0]] dbase = Database(3,items) # Create two clusters cluster1 = Cluster([0.5,0.5,0.0]) cluster2 = Cluster([0.0,0.0,0.5]) # Initialize the database to use these clusters (access hidden attributes) # THIS VIOLATES GOOD PROGRAMMING. But sometimes rules must be broken. dbase._clusters = [None,None] # Make sure the list can hold two clusters dbase._clusters[0] = cluster1 dbase._clusters[1] = cluster2 dbase._ksize = 2 # TEST CASE 1 (distance) dist = cluster1.distance([1.0,0.0,-1.0]) cornelltest.assert_floats_equal(1.22474487139,dist) # TEST CASE 2 (distance) dist = cluster1.distance([0.5,0.5,0.0]) cornelltest.assert_floats_equal(0.0,dist) print ' Method distance() looks okay' # TEST CASE 3 (nearest) nearest = dbase.nearest([1.0,0.0,0.0]) cornelltest.assert_equals(id(cluster1),id(nearest)) # TEST CASE 4 (nearest) nearest = dbase.nearest([0.0,0.0,1.0]) cornelltest.assert_equals(id(cluster2),id(nearest)) print ' Method nearest() looks okay' # TEST CASE 5 (partition) dbase.partition() # First half of list is in first cluster cornelltest.assert_float_lists_equal(items[:2],cluster1.getContents()) # Second half of list is in second cluster cornelltest.assert_float_lists_equal(items[2:],cluster2.getContents()) # TEST CASE 6 (partition) # Change the clusters dbase._clusters[0].setCentroid([0.0,0.0,0.5]) dbase._clusters[1].setCentroid([0.5,0.5,0.0]) dbase.partition() # Second half of list is in first cluster cornelltest.assert_float_lists_equal(items[2:],cluster1.getContents()) # First half of list is in second cluster cornelltest.assert_float_lists_equal(items[:2],cluster2.getContents()) print ' Method partition() looks okay' print ' Part C appears correct' def testD(): """Test Part D (of Part I) of the assignment. This test procedure shows why we are providing you with the unit tests, rather than asking you to write your own. The method setKSize() has a randomization element inside of it. It is hard to check random things, because you do not get the same answer each time. To get around that, we use the function random.seed(). This function essentially turns off the random number generator, and makes it return predicatable values. For more information, see http://en.wikipedia.org/wiki/Pseudorandomness.""" print ' Testing Part D (setKSize)' # Force the random number generator to not be random random.seed(1) # FOR BOTH TEST CASES # Create and test a non-empty database items = [[1.0,0.0,0.0],[0.0,1.0,0.0],[0.0,0.0,0.0],[0.0,0.0,1.0]] dbase = Database(3,items) # TEST CASE 1 (Change k) dbase.setKSize(0) cornelltest.assert_equals(0,dbase.getKSize()) cornelltest.assert_float_lists_equal([],dbase._clusters) # TEST CASE 2 (Change k) dbase.setKSize(2) # Should create two clusters cornelltest.assert_equals(2,dbase.getKSize()) # Check first cluster cluster1 = dbase.getCluster(0) cornelltest.assert_float_lists_equal([1.0,0.0,0.0],cluster1.getCentroid()) cornelltest.assert_float_lists_equal(items[:3],cluster1.getContents()) # Check second cluster cluster2 = dbase.getCluster(1) cornelltest.assert_float_lists_equal([0.0,0.0,1.0],cluster2.getCentroid()) cornelltest.assert_float_lists_equal(items[3:],cluster2.getContents()) # TEST CASE 3 (Change k) dbase.setKSize(3) # Should create three clusters cornelltest.assert_equals(3,dbase.getKSize()) # Check first cluster cluster1 = dbase.getCluster(0) cornelltest.assert_float_lists_equal([0.0,0.0,1.0],cluster1.getCentroid()) cornelltest.assert_float_lists_equal(items[3:4],cluster1.getContents()) # Check second cluster cluster2 = dbase.getCluster(1) cornelltest.assert_float_lists_equal([0.0,1.0,0.0],cluster2.getCentroid()) cornelltest.assert_float_lists_equal(items[1:2],cluster2.getContents()) # Check third cluster cluster3 = dbase.getCluster(2) cornelltest.assert_float_lists_equal([0.0,0.0,0.0],cluster3.getCentroid()) cornelltest.assert_float_lists_equal(items[0:1]+items[2:3],cluster3.getContents()) print ' Part D appears correct' def testE(): """Test Part E (of Part I) of the assignment. This tests the final part of K-means. It gets a lot easier from here. As with the test for Part D, we have to use random.seed to fix the random number generator.""" print ' Testing Part E' # Force the random number generator to not be random random.seed(3) # More interesting result than a seed of 1 # FOR ALL TEST CASES # Create and initialize a non-empty database items = [[0.5,0.5,0.5],[0.5,0.6,0.6],[0.6,0.5,0.6],[0.5,0.6,0.5],[0.5,0.4,0.5],[0.5,0.4,0.4]] dbase = Database(3,items) dbase.setKSize(2) # PRE-TEST: Check first cluster (should be okay if passed part D) cluster1 = dbase.getCluster(0) cornelltest.assert_float_lists_equal([0.5, 0.6, 0.6],cluster1.getCentroid()) cornelltest.assert_float_lists_equal(items[1:3],cluster1.getContents()) # PRE-TEST: Check second cluster (should be okay if passed part D) cluster2 = dbase.getCluster(1) cornelltest.assert_float_lists_equal([0.5, 0.6, 0.5],cluster2.getCentroid()) cornelltest.assert_float_lists_equal([items[0]]+items[3:],cluster2.getContents()) # Make a copy of a cluster (to test update() method) clustertest = Cluster(cluster1.getCentroid()) for point in cluster1.getContents(): clustertest.appendContents(point) # TEST CASE 1 (update) stable = clustertest.update() cornelltest.assert_float_lists_equal([0.55, 0.55, 0.6],clustertest.getCentroid()) cornelltest.assert_false(stable) # Not yet stable # TEST CASE 2 (update) stable = clustertest.update() cornelltest.assert_float_lists_equal([0.55, 0.55, 0.6],clustertest.getCentroid()) cornelltest.assert_true(stable) # Now it is stable print ' Method update() looks okay' # TEST CASE 3 (step) dbase.step() # K size should be unchanged cornelltest.assert_equals(2,dbase.getKSize()) # Check first cluster (WHICH HAS CHANGED!) cluster1 = dbase.getCluster(0) cornelltest.assert_float_lists_equal([0.55, 0.55, 0.6],cluster1.getCentroid()) cornelltest.assert_float_lists_equal(items[1:4],cluster1.getContents()) # Check second cluster (WHICH HAS CHANGED!) cluster2 = dbase.getCluster(1) cornelltest.assert_float_lists_equal([0.5, 0.475, 0.475],cluster2.getCentroid()) cornelltest.assert_float_lists_equal([items[0]]+items[4:],cluster2.getContents()) print ' Method step() looks okay' print ' Part E appears correct' def testF(): """Test Part F (of Part I) of the assignment. This test procedure checks out the simple additions to class Cluster to support market analysis (ratings and str support). It is perhaps the most straightforward unit test of the lot.""" print ' Testing Part F' # For all unit tests, create a new cluster. cluster = Cluster([0.0,0.0,0.0]) # TEST CASE 1 (rating) cornelltest.assert_equals(None,cluster.getRating()) cluster.addRating(1.0) cornelltest.assert_floats_equal(1.0,cluster.getRating()) cluster.addRating(0.5) cornelltest.assert_floats_equal(0.75,cluster.getRating()) cluster.addRating(0.6) cornelltest.assert_floats_equal(0.7,cluster.getRating()) cluster.clearRating() cornelltest.assert_equals(None,cluster.getRating()) print ' Rating methods look ok' # TEST CASE 2 (str) cornelltest.assert_equals(str([0.0,0.0,0.0]),str(cluster)) cornelltest.assert_equals(str(cluster.__class__)+str([0.0,0.0,0.0]),`cluster`) cluster.setCentroid([0.0,0.5,4.2]) cornelltest.assert_equals(str([0.0,0.5,4.2]),str(cluster)) print ' str support look ok' print ' Part F appears correct' def test_best_cluster(): """Test the analysis portion of the assignment. We only have one test case for this part of the assignment. As everything else is working, one should be enough.""" print ' Testing best_cluster on small_candy.txt' random.seed(2) cluster = best_cluster('small_candy.txt',3,20,0.25) centroid = [0.6730769230769229, 0.26384615384615384, 0.5303846153846155, 0.27538461538461534] contents = [[0.73, 0.31, 0.15, 0.08], [0.77, 0.45, 0.31, 0.31], [0.39, 0.14, 0.99, 0.24], [0.65, 0.05, 0.39, 0.49], [0.96, 0.09, 0.49, 0.30], [0.86, 0.03, 0.30, 0.39], [0.79, 0.09, 0.41, 0.69], [0.65, 0.24, 0.63, 0.27], [0.80, 0.40, 0.23, 0.33], [0.39, 0.38, 0.85, 0.32], [0.38, 0.07, 0.82, 0.01], [0.66, 0.09, 0.69, 0.46], [0.95, 0.62, 0.28, 0.01], [0.62, 0.24, 0.77, 0.17], [0.73, 0.65, 0.23, 0.02], [0.72, 0.55, 0.10, 0.17], [0.61, 0.42, 0.24, 0.33], [0.46, 0.35, 0.96, 0.05], [0.62, 0.01, 0.88, 0.10], [0.58, 0.37, 0.90, 0.08], [0.90, 0.05, 0.34, 0.41], [0.90, 0.41, 0.27, 0.36], [0.67, 0.32, 0.66, 0.20], [0.72, 0.14, 0.63, 0.37], [0.63, 0.05, 0.52, 0.63], [0.36, 0.34, 0.75, 0.37]] cornelltest.assert_float_lists_equal(centroid,cluster.getCentroid()) cornelltest.assert_float_lists_equal(contents,cluster.getContents()) print ' best_cluster appears to be working correctly' if __name__ == '__main__': print 'Starting unit test' testA() testB() testC() testD() testE() testF() test_best_cluster() print 'The module a5.py is working correctly'