"""
A unit test for k-means clustering

This unit test is not complete.  It is enough to help guide you with this assignment,
but you should not rely on it catching all possible errors.  You might want to add
other tests, but this is not required.

Authors: Walker M. White (wmw2), Steve Marschner (srm2)
Date: October 18, 2018
"""
import introcs
import random
import numpy
import tools
import os, os.path

# The modules to test.
import a6dataset
import a6cluster
import a6algorithm

# Helper function for latter tests
TEST_FILE = 'data/candy.csv'


def assert_point_sets_equal(expected,received):
    """
    Quits if the lists of points ``expected`` and ``received`` differ

    This function takes two lists of points and compares them using functions
    from the numerical  package ``numpy``.  This is a scientific computing
    package that allows us to test if numbers are "close enough".

    :param expected: The value you expect the test to have
    :type expected:  ``list`` or ``tuple``

    :param received: The value the test actually had
    :type received:  ``list`` or ``tuple``
    """
    import numpy
    if not type(expected) in [list,tuple]:
        msg = ('assert_point_sets_equal: first argument %s is not a sequence' % repr(expected))
        introcs.quit_with_error(msg)
    elif not type(received) in [list,tuple]:
        msg = ('assert_point_sets_equal: second argument %s is not a sequence' % repr(received))
        introcs.quit_with_error(msg)
    elif sum(map(lambda x : 0 if type(x) in [list,tuple] else 1,expected)) > 0:
        msg = ( 'assert_point_sets_equal: first argument %s is not 2-dimensional' % repr(expected))
        introcs.quit_with_error(msg)
    elif sum(map(lambda x : 0 if type(x) in [list,tuple] else 1,received)) > 0:
        msg = ( 'assert_point_sets_equal: second argument %s is not 2-dimensional' % repr(received))
        introcs.quit_with_error(msg)
    elif any([sum(map(lambda x : 0 if type(x) in [int,float] else 1,item)) > 0 for item in expected]) :
        msg = ( 'assert_point_sets_equal: first argument %s has non-numeric values' % repr(expected))
        introcs.quit_with_error(msg)
    elif any([sum(map(lambda x : 0 if type(x) in [int,float] else 1,item)) > 0 for item in received]) :
        msg = ( 'assert_point_sets_equal: second argument %s has non-numeric values' % repr(received))
        introcs.quit_with_error(msg)
    elif len(expected) != len(received):
        msg = ( 'assert_point_sets_equal: sequences %s and %s have different sizes' %
               (repr(expected),repr(received)))
        introcs.quit_with_error(msg)

    test = True
    try:
        test = numpy.allclose(expected,received)
    except Exception as e:
        msg = 'assert_point_sets_equal: sequences %s and %s are not comparable' % (repr(expected),repr(received))
        introcs.quit_with_error(msg)
    if (not test):
        msg = 'assert_point_sets_equal: expected %s but instead got %s' % (repr(expected),repr(received))
        introcs.quit_with_error(msg)


#        return a6.Algorithm(dataset, k, seeds)


def test_dataset():
    """
    Tests the Dataset class.
    """
    print('  Testing class Dataset')

    # TEST CASE 1
    # Create and test an empty dataset
    dset1 = a6dataset.Dataset(3)
    introcs.assert_equals(3,dset1.getDimension())
    introcs.assert_equals(0,dset1.getSize())

    # We use this assert function to compare lists
    introcs.assert_float_lists_equal([],dset1.getContents())

    print('    Default initialization looks okay')

    # TEST CASE 2
    # Create and test a non-empty dataset
    items = [[0.0,0.0,0.0],[1.0,0.0,0.0],[0.0,1.0,0.0],[0.0,0.0,1.0]]
    dset2 = a6dataset.Dataset(3,items)
    introcs.assert_equals(3,dset2.getDimension())
    introcs.assert_equals(4,dset2.getSize())

    # Check that contents is initialized correctly
    # Make sure items is COPIED
    assert_point_sets_equal(items,dset2.getContents())
    introcs.assert_false(dset2.getContents() is items)
    introcs.assert_false(dset2.getContents()[0] is items[0])

    print('    User-provided initialization looks okay')

    # Check that getPoint() is correct AND that it copies
    introcs.assert_float_lists_equal([0.0,1.0,0.0],dset2.getPoint(2))
    introcs.assert_false(dset2.getContents()[2] is dset2.getPoint(2))

    print('    Method Dataset.getPoint looks okay')

    # Add something to the dataset (and check it was added)
    dset1.addPoint([0.0,0.5,4.2])
    assert_point_sets_equal([[0.0,0.5,4.2]],dset1.getContents())
    introcs.assert_float_lists_equal([0.0,0.5,4.2],dset1.getPoint(0))
    # Check the point is COPIED
    introcs.assert_false(dset1.getPoint(0) is dset1.getContents()[0])

    extra = [0.0,0.5,4.2]
    dset2.addPoint(extra)
    items.append(extra)
    assert_point_sets_equal(items,dset2.getContents())
    # Check the point was COPIED
    introcs.assert_false(id(extra) in map(id,dset2.getContents()))

    print('    Method Dataset.addPoint looks okay')
    print('  class Dataset appears correct')
    print()


def test_cluster_a():
    """
    Tests Part A of the Cluster class assignment.
    """
    print('  Testing Part A of class Cluster')

    # TEST CASE 1
    # Create and test a cluster (always empty)
    dset = a6dataset.Dataset(3)
    point = [0.0,1.0,0.0]
    cluster1 = a6cluster.Cluster(dset, point)

    # Compare centroid and contents
    introcs.assert_float_lists_equal(point,cluster1.getCentroid())
    introcs.assert_equals([],cluster1.getIndices())
    # Make sure centroid COPIED
    introcs.assert_not_equals(id(point),id(cluster1.getCentroid()))

    print('    Basic cluster methods look okay')

    # Add something to cluster (and check it was added)
    extra = [[0.0,0.5,4.2],[0.0,1.0,0.0]]
    dset.addPoint(extra[0])
    dset.addPoint(extra[1])
    cluster1.addIndex(1)
    introcs.assert_equals([1],cluster1.getIndices())
    cluster1.addIndex(0)
    introcs.assert_equals([1,0],cluster1.getIndices())
    # Make sure we can handle duplicates!
    cluster1.addIndex(1)
    introcs.assert_equals([1,0],cluster1.getIndices())

    print('    Method Cluster.addIndex look okay')

    # And clear it
    contents = cluster1.getContents()
    introcs.assert_equals(2,len(contents))
    introcs.assert_float_lists_equal(extra[1],contents[0])
    introcs.assert_float_lists_equal(extra[0],contents[1])

    print('    Method Cluster.getContents look okay')

    # And clear it
    cluster1.clear()
    introcs.assert_equals([],cluster1.getIndices())

    print('    Method Cluster.clear look okay')
    print('  Part A of class Cluster appears correct')
    print()


def test_cluster_b():
    """
    Tests Part B of the Cluster class assignment.
    """
    print('  Testing Part B of class Cluster')

    # A dataset with four points
    items = [[1.0,0.0,0.0],[0.0,1.0,0.0],[0.0,0.0,0.0],[0.0,0.0,1.0]]
    dset = a6dataset.Dataset(3, items)

    # Create some clusters
    cluster1 = a6cluster.Cluster(dset, [0.0,0.0,0.2])
    cluster2 = a6cluster.Cluster(dset, [0.5,0.5,0.0])
    cluster3 = a6cluster.Cluster(dset, [0.0,0.0,0.5])

    # TEST CASE 1 (distance)
    dist = cluster2.distance([1.0,0.0,-1.0])
    introcs.assert_floats_equal(1.22474487139,dist)

    # TEST CASE 2 (distance)
    dist = cluster2.distance([0.5,0.5,0.0])
    introcs.assert_floats_equal(0.0,dist)

    # TEST CASE 3 (distance)
    dist = cluster3.distance([0.5,0.0,0.5])
    introcs.assert_floats_equal(0.5,dist)
    print('    Method Cluster.distance() looks okay')

    # Add some indices
    cluster1.addIndex(0)
    cluster1.addIndex(1)
    cluster2.addIndex(0)
    cluster2.addIndex(1)
    cluster3.addIndex(0)
    cluster3.addIndex(1)
    cluster3.addIndex(2)

    # TEST CASE 1 (radius)
    rads = cluster1.getRadius()
    introcs.assert_floats_equal(1.0198039,rads)

    # TEST CASE 2 (radius)
    rads = cluster2.getRadius()
    introcs.assert_floats_equal(0.7071068,rads)

    # TEST CASE 3 (radius)
    rads = cluster3.getRadius()
    introcs.assert_floats_equal(1.1180340,rads)
    print('    Method Cluster.getRadius() looks okay')

    # TEST CASE 1 (updateCentroid): centroid remains the same
    stable = cluster2.update()
    introcs.assert_float_lists_equal([0.5, 0.5, 0.0], cluster2.getCentroid())
    introcs.assert_true(stable)

    # TEST CASE 2 (updateCentroid): centroid changes
    cluster2.addIndex(2)
    cluster2.addIndex(3)
    stable = cluster2.update()
    introcs.assert_float_lists_equal([0.25, 0.25, 0.25], cluster2.getCentroid())
    introcs.assert_false(stable)
    # updating again without changing points: centroid stable
    stable = cluster2.update()
    introcs.assert_float_lists_equal([0.25, 0.25, 0.25], cluster2.getCentroid())
    introcs.assert_true(stable)

    print('    Method Cluster.update() looks okay')
    print('  Part B of class Cluster appears correct')
    print()


def test_algorithm_a():
    """
    Tests Part A of the Algorithm class.
    """
    print('  Testing Part A of class Algorithm')

    # A dataset with four points almost in a square
    items = [[0.,0.], [10.,1.], [10.,10.], [0.,9.]]
    dset = a6dataset.Dataset(2, items)

    # Test creating a clustering with random seeds
    km = a6algorithm.Algorithm(dset, 3)
    # Should have 3 clusters
    introcs.assert_equals(len(km.getClusters()), 3)
    for clust in km.getClusters():
        # cluster centroids should have been chosen from items
        introcs.assert_true(clust.getCentroid() in items)
        # cluster centroids should be distinct (since items are)
        for clust2 in km.getClusters():
            if clust2 is not clust:
                introcs.assert_float_lists_not_equal(clust.getCentroid(), clust2.getCentroid())

    print('    Random Algorithm initialization looks okay')

    # Clusterings of that dataset, with two and three deterministic clusters
    km = a6algorithm.Algorithm(dset, 2, [0,2])
    introcs.assert_equals(items[0], km.getClusters()[0].getCentroid())
    introcs.assert_equals(items[2], km.getClusters()[1].getCentroid())
    km = a6algorithm.Algorithm(dset, 3, [0,2,3])
    introcs.assert_equals(items[0], km.getClusters()[0].getCentroid())
    introcs.assert_equals(items[2], km.getClusters()[1].getCentroid())
    introcs.assert_equals(items[3], km.getClusters()[2].getCentroid())

    # Try it on a file
    file = os.path.join(os.path.split(__file__)[0],TEST_FILE)
    data = a6dataset.Dataset(4, tools.data_for_file(file))
    km2  = a6algorithm.Algorithm(data, 3, [23, 54, 36])
    introcs.assert_equals([0.38, 0.94, 0.53, 0.07], km2.getClusters()[0].getCentroid())
    introcs.assert_equals([0.84, 0.88, 0.04, 0.86], km2.getClusters()[1].getCentroid())
    introcs.assert_equals([0.8, 0.4, 0.23, 0.33],   km2.getClusters()[2].getCentroid())

    print('    Seeded Algorithm initialization looks okay')
    print('  Part A of class Algorithm appears correct')
    print()


def test_algorithm_b():
    """
    Tests Part B of the Algorithm class.
    s"""
    # This function tests the methods _nearest and _partition, both of which are hidden
    # methods.  Normally it's not good form to directly call these methods from outside
    # the class, but we make an exception for testing code, which often has to be more
    # tightly integrated with the implementation of a class than other code that just
    # uses the class.
    print('  Testing Part B of class Algorithm')
    # Reinitialize data set
    items = [[0.,0.], [10.,1.], [10.,10.], [0.,9.]]
    dset = a6dataset.Dataset(2, items)
    km1 = a6algorithm.Algorithm(dset, 2, [0,2])
    km2 = a6algorithm.Algorithm(dset, 3, [0,2,3])

    nearest = km1._nearest([1.,1.])
    introcs.assert_true(nearest is km1.getClusters()[0])

    nearest = km1._nearest([1.,10.])
    introcs.assert_true(nearest is km1.getClusters()[1])

    nearest = km2._nearest([1.,1.])
    introcs.assert_true(nearest is km2.getClusters()[0])

    nearest = km2._nearest([1.,10.])
    introcs.assert_true(nearest is km2.getClusters()[2])
    print('    Method Algorithm._nearest() looks okay')

    # Testing partition()
    # For this example points 0 and 3 are closer, as are 1 and 2
    km1._partition()
    introcs.assert_equals(set([0,3]), set(km1.getClusters()[0].getIndices()))
    introcs.assert_equals(set([1,2]), set(km1.getClusters()[1].getIndices()))
    # partition and repeat -- should not change clusters.
    km1._partition()
    introcs.assert_equals(set([0,3]), set(km1.getClusters()[0].getIndices()))
    introcs.assert_equals(set([1,2]), set(km1.getClusters()[1].getIndices()))

    # Reset the cluster centroids; now it changes
    cluster = km1.getClusters()
    cluster[0]._centroid = [5.0, 10.0]
    cluster[1]._centroid = [0.0, 2.0]
    km1._partition()
    introcs.assert_equals(set([2,3]), set(km1.getClusters()[0].getIndices()))
    introcs.assert_equals(set([0,1]), set(km1.getClusters()[1].getIndices()))

    # Try it on a file
    index1 = [2, 3, 5, 9, 11, 15, 16, 18, 19, 20, 22, 23, 29, 30, 32, 33, 37, 40, 41, 42,
              44, 45, 50, 60, 61, 62, 64, 69, 71, 73, 75, 76, 78, 80, 85, 88, 90, 94, 97]
    index2 = [0, 34, 8, 43, 66, 46, 77, 84, 54]
    index3 = [1, 4, 6, 7, 10, 12, 13, 14, 17, 21, 24, 25, 26, 27, 28, 31, 35, 36, 38, 39,
              47, 48, 49, 51, 52, 53, 55, 56, 57, 58, 59, 63, 65, 67, 68, 70, 72, 74, 79,
              81, 82, 83, 86, 87, 89, 91, 92, 93, 95, 96, 98, 99]

    file = os.path.join(os.path.split(__file__)[0],TEST_FILE)
    data = a6dataset.Dataset(4, tools.data_for_file(file))
    km3  = a6algorithm.Algorithm(data, 3, [23, 54, 36])
    km3._partition()
    introcs.assert_equals(set(index1), set(km3.getClusters()[0].getIndices()))
    introcs.assert_equals(set(index2), set(km3.getClusters()[1].getIndices()))
    introcs.assert_equals(set(index3), set(km3.getClusters()[2].getIndices()))

    print('    Method Algorithm._partition() looks okay')
    print('  Part B of class Algorithm appears correct')
    print()


def test_algorithm_c():
    """
    Tests Part C of the Algorithm class.
    """
    print('  Testing Part C of class Algorithm')
    items = [[0.,0.], [10.,1.], [10.,10.], [0.,9.]]
    dset = a6dataset.Dataset(2, items)
    km1 = a6algorithm.Algorithm(dset, 2, [0,2])
    km1._partition()

    # Test update()
    stable = km1._update()
    introcs.assert_float_lists_equal([0,4.5], km1.getClusters()[0].getCentroid())
    introcs.assert_float_lists_equal([10.0,5.5], km1.getClusters()[1].getCentroid())
    introcs.assert_false(stable)

    # updating again should not change anything, but should return stable
    stable = km1._update()
    introcs.assert_float_lists_equal([0,4.5], km1.getClusters()[0].getCentroid())
    introcs.assert_float_lists_equal([10.0,5.5], km1.getClusters()[1].getCentroid())
    introcs.assert_true(stable)

    print('    Method Algorithm._update() looks okay')

    # Now test the k-means process itself.

    # FOR ALL TEST CASES
    # Create and initialize a non-empty dataset
    items = [[0.5,0.5,0.5],[0.5,0.6,0.6],[0.6,0.5,0.6],[0.5,0.6,0.5],[0.5,0.4,0.5],[0.5,0.4,0.4]]
    dset = a6dataset.Dataset(3,items)

    # Create a clustering, providing non-random seed indices so the test is deterministic
    km2 = a6algorithm.Algorithm(dset, 2, [1, 3])

    # PRE-TEST: Check first cluster (should be okay if passed part D)
    cluster1 = km2.getClusters()[0]
    introcs.assert_float_lists_equal([0.5, 0.6, 0.6], cluster1.getCentroid())
    introcs.assert_equals(set([]), set(cluster1.getIndices()))

    # PRE-TEST: Check second cluster (should be okay if passed part D)
    cluster2 = km2.getClusters()[1]
    introcs.assert_float_lists_equal([0.5, 0.6, 0.5], cluster2.getCentroid())
    introcs.assert_equals(set([]), set(cluster2.getIndices()))

    # Make a fake cluster to test update_centroid() method
    clustertest = a6cluster.Cluster(dset, [0.5, 0.6, 0.6])
    for ind in [1, 2]:
        clustertest.addIndex(ind)

    # TEST CASE 1 (update)
    stable = clustertest.update()
    introcs.assert_float_lists_equal([0.55, 0.55, 0.6],clustertest.getCentroid())
    introcs.assert_false(stable) # Not yet stable

    # TEST CASE 2 (update)
    stable = clustertest.update()
    introcs.assert_float_lists_equal([0.55, 0.55, 0.6],clustertest.getCentroid())
    introcs.assert_true(stable) # Now it is stable

    # TEST CASE 3 (step)
    km2.step()

    # Check first cluster (WHICH HAS CHANGED!)
    cluster1 = km2.getClusters()[0]
    introcs.assert_float_lists_equal([0.55, 0.55, 0.6], cluster1.getCentroid())
    introcs.assert_equals(set([1, 2]), set(cluster1.getIndices()))

    # Check second cluster (WHICH HAS CHANGED!)
    cluster2 = km2.getClusters()[1]
    introcs.assert_float_lists_equal([0.5, 0.475, 0.475],cluster2.getCentroid())
    introcs.assert_equals(set([0, 3, 4, 5]), set(cluster2.getIndices()))

    # TEST CASE 3 (step)
    km2.step()

    # Check first cluster (WHICH HAS CHANGED!)
    cluster1 = km2.getClusters()[0]
    introcs.assert_float_lists_equal([8./15, 17./30, 17./30], cluster1.getCentroid())
    introcs.assert_equals(set([1, 2, 3]), set(cluster1.getIndices()))

    # Check second cluster (WHICH HAS CHANGED!)
    cluster2 = km2.getClusters()[1]
    introcs.assert_float_lists_equal([0.5, 13./30, 14./30],cluster2.getCentroid())
    introcs.assert_equals(set([0, 4, 5]), set(cluster2.getIndices()))

    # Try it on a file
    file = os.path.join(os.path.split(__file__)[0],TEST_FILE)
    data = a6dataset.Dataset(4, tools.data_for_file(file))
    km3  = a6algorithm.Algorithm(data, 3, [23, 54, 36])
    km3.step()

    # The actual results
    cluster0 = km3.getClusters()[0]
    cluster1 = km3.getClusters()[1]
    cluster2 = km3.getClusters()[2]

    # The "correct" answers
    contents0 = [[0.88, 0.84, 0.8, 0.3], [0.02, 0.67, 0.75, 0.61], [0.2, 0.54, 0.73, 0.85],
                 [0.62, 0.75, 0.65, 0.43], [0.35, 0.63, 0.65, 0.12], [0.61, 0.85, 0.81, 0.44],
                 [0.95, 0.94, 0.98, 0.69], [0.04, 0.69, 0.38, 0.39], [0.04, 0.52, 0.99, 0.75],
                 [0.28, 0.91, 0.63, 0.08], [0.14, 0.55, 0.67, 0.63], [0.38, 0.94, 0.53, 0.07],
                 [0.08, 0.62, 0.32, 0.27], [0.69, 0.82, 0.75, 0.65], [0.84, 0.89, 0.91, 0.38],
                 [0.22, 0.88, 0.39, 0.33], [0.39, 0.38, 0.85, 0.32], [0.26, 0.39, 0.95, 0.63],
                 [0.15, 0.87, 0.62, 0.22], [0.65, 0.81, 0.69, 0.55], [0.27, 0.63, 0.69, 0.39],
                 [0.35, 0.7, 0.41, 0.15], [0.2, 0.48, 0.98, 0.84], [0.76, 0.86, 0.74, 0.61],
                 [0.27, 0.65, 0.52, 0.28], [0.86, 0.91, 0.88, 0.62], [0.1, 0.79, 0.5, 0.12],
                 [0.09, 0.85, 0.55, 0.21], [0.79, 0.94, 0.83, 0.48], [0.73, 0.92, 0.74, 0.39],
                 [0.31, 0.5, 0.87, 0.85], [0.39, 0.9, 0.52, 0.26], [0.46, 0.35, 0.96, 0.05],
                 [0.21, 0.62, 0.33, 0.09], [0.58, 0.37, 0.9, 0.08], [0.54, 0.92, 0.36, 0.35],
                 [0.36, 0.64, 0.57, 0.26], [0.09, 0.47, 0.63, 0.8], [0.4, 0.69, 0.74, 0.7]]
    contents1 = [[0.32, 0.87, 0.14, 0.68], [0.87, 0.99, 0.2, 0.8], [0.86, 0.86, 0.32, 0.88],
                 [0.81, 0.66, 0.26, 0.82], [0.91, 0.98, 0.61, 0.58], [0.84, 0.88, 0.04, 0.86],
                 [0.8, 0.62, 0.09, 0.65], [0.72, 0.88, 0.02, 0.95], [0.88, 0.96, 0.09, 0.88]]
    contents2 = [[0.4, 0.21, 0.78, 0.68], [0.54, 0.06, 0.81, 0.98], [0.73, 0.31, 0.15, 0.08],
                 [0.81, 0.69, 0.65, 0.65], [0.14, 0.31, 0.86, 0.74], [0.77, 0.45, 0.31, 0.31],
                 [0.39, 0.14, 0.99, 0.24], [0.23, 0.32, 0.7, 0.75], [0.65, 0.05, 0.39, 0.49],
                 [0.96, 0.09, 0.49, 0.3], [0.86, 0.03, 0.3, 0.39], [0.5, 0.2, 0.69, 0.95],
                 [0.79, 0.09, 0.41, 0.69], [0.4, 0.3, 0.78, 0.74], [0.65, 0.24, 0.63, 0.27],
                 [0.35, 0.3, 0.94, 0.92], [0.71, 0.78, 0.64, 0.57], [0.8, 0.4, 0.23, 0.33],
                 [0.38, 0.07, 0.82, 0.01], [0.66, 0.09, 0.69, 0.46], [0.54, 0.06, 0.74, 0.86],
                 [0.95, 0.62, 0.28, 0.01], [0.35, 0.71, 0.01, 0.32], [0.62, 0.24, 0.77, 0.17],
                 [0.73, 0.65, 0.23, 0.02], [0.27, 0.38, 0.76, 0.63], [0.9, 0.63, 0.83, 0.6],
                 [0.7, 0.04, 0.7, 0.82], [0.95, 0.83, 0.64, 0.5], [0.41, 0.11, 0.61, 0.78],
                 [0.22, 0.44, 0.67, 0.99], [0.51, 0.05, 0.95, 0.66], [0.99, 0.68, 0.8, 0.42],
                 [0.72, 0.55, 0.1, 0.17], [0.44, 0.1, 0.61, 0.98], [0.31, 0.16, 0.95, 0.9],
                 [0.61, 0.42, 0.24, 0.33], [0.89, 0.72, 0.78, 0.38], [0.5, 0.09, 0.84, 0.78],
                 [0.62, 0.01, 0.88, 0.1], [0.44, 0.28, 0.88, 0.99], [0.57, 0.23, 0.6, 0.85],
                 [0.9, 0.05, 0.34, 0.41], [0.9, 0.41, 0.27, 0.36], [0.67, 0.32, 0.66, 0.2],
                 [0.72, 0.14, 0.63, 0.37], [0.39, 0.08, 0.77, 0.96], [0.9, 0.7, 0.74, 0.63],
                 [0.63, 0.05, 0.52, 0.63], [0.62, 0.27, 0.67, 0.77], [0.35, 0.04, 0.85, 0.86],
                 [0.36, 0.34, 0.75, 0.37]]
    centroid0 = [0.3987179487179487, 0.7097435897435899, 0.6864102564102561, 0.4164102564102565]
    centroid1 = [0.7788888888888889, 0.8555555555555555, 0.19666666666666668, 0.788888888888889]
    centroid2 = [0.6038461538461538, 0.29865384615384616, 0.6217307692307692, 0.5455769230769231]

    introcs.assert_float_lists_equal(centroid0,cluster0.getCentroid())
    introcs.assert_float_lists_equal(centroid1,cluster1.getCentroid())
    introcs.assert_float_lists_equal(centroid2,cluster2.getCentroid())
    assert_point_sets_equal(contents0,cluster0.getContents())
    assert_point_sets_equal(contents1,cluster1.getContents())
    assert_point_sets_equal(contents2,cluster2.getContents())


    print('    Method Algorithm.step looks okay')
    print('  Part C of class Algorithm appears correct')
    print()


def test_algorithm_d():
    """
    Tests Part D of the Algorithm class.
    """
    print('  Testing Part D of class Algorithm')
    items = [[0.5,0.5,0.5],[0.5,0.6,0.6],[0.6,0.5,0.6],[0.5,0.6,0.5],[0.5,0.4,0.5],[0.5,0.4,0.4]]
    dset = a6dataset.Dataset(3,items)

    # Try the same test case straight from the top using perform_k_means
    km1 = a6algorithm.Algorithm(dset, 2, [1, 3])
    km1.run(10)

    # Check first cluster
    cluster1 = km1.getClusters()[0]
    introcs.assert_float_lists_equal([8./15, 17./30, 17./30], cluster1.getCentroid())
    introcs.assert_equals(set([1, 2, 3]), set(cluster1.getIndices()))

    # Check second cluster
    cluster2 = km1.getClusters()[1]
    introcs.assert_float_lists_equal([0.5, 13./30, 14./30],cluster2.getCentroid())
    introcs.assert_equals(set([0, 4, 5]), set(cluster2.getIndices()))
    print('    Method run looks okay')

    # Test on a real world data set
    file = os.path.join(os.path.split(__file__)[0],TEST_FILE)
    data = a6dataset.Dataset(4, tools.data_for_file(file))
    km2  = a6algorithm.Algorithm(data, 3, [23, 54, 36])
    km2.run(20)

    # The actual results
    cluster0 = km2.getClusters()[0]
    cluster1 = km2.getClusters()[1]
    cluster2 = km2.getClusters()[2]

    # The "correct" answers
    contents0 = [[0.88, 0.84, 0.8, 0.3], [0.02, 0.67, 0.75, 0.61], [0.81, 0.69, 0.65, 0.65],
                 [0.62, 0.75, 0.65, 0.43], [0.35, 0.63, 0.65, 0.12], [0.61, 0.85, 0.81, 0.44],
                 [0.95, 0.94, 0.98, 0.69], [0.04, 0.69, 0.38, 0.39], [0.28, 0.91, 0.63, 0.08],
                 [0.38, 0.94, 0.53, 0.07], [0.08, 0.62, 0.32, 0.27], [0.69, 0.82, 0.75, 0.65],
                 [0.84, 0.89, 0.91, 0.38], [0.22, 0.88, 0.39, 0.33], [0.71, 0.78, 0.64, 0.57],
                 [0.15, 0.87, 0.62, 0.22], [0.65, 0.81, 0.69, 0.55], [0.27, 0.63, 0.69, 0.39],
                 [0.35, 0.7, 0.41, 0.15], [0.91, 0.98, 0.61, 0.58], [0.9, 0.63, 0.83, 0.6],
                 [0.95, 0.83, 0.64, 0.5], [0.76, 0.86, 0.74, 0.61], [0.27, 0.65, 0.52, 0.28],
                 [0.86, 0.91, 0.88, 0.62], [0.1, 0.79, 0.5, 0.12], [0.99, 0.68, 0.8, 0.42],
                 [0.09, 0.85, 0.55, 0.21], [0.79, 0.94, 0.83, 0.48], [0.73, 0.92, 0.74, 0.39],
                 [0.89, 0.72, 0.78, 0.38], [0.39, 0.9, 0.52, 0.26], [0.46, 0.35, 0.96, 0.05],
                 [0.21, 0.62, 0.33, 0.09], [0.58, 0.37, 0.9, 0.08], [0.54, 0.92, 0.36, 0.35],
                 [0.67, 0.32, 0.66, 0.2], [0.36, 0.64, 0.57, 0.26], [0.9, 0.7, 0.74, 0.63],
                 [0.4, 0.69, 0.74, 0.7]]
    contents1 = [[0.32, 0.87, 0.14, 0.68], [0.73, 0.31, 0.15, 0.08], [0.87, 0.99, 0.2, 0.8],
                 [0.77, 0.45, 0.31, 0.31], [0.96, 0.09, 0.49, 0.3], [0.86, 0.03, 0.3, 0.39],
                 [0.86, 0.86, 0.32, 0.88], [0.8, 0.4, 0.23, 0.33], [0.81, 0.66, 0.26, 0.82],
                 [0.95, 0.62, 0.28, 0.01], [0.35, 0.71, 0.01, 0.32], [0.73, 0.65, 0.23, 0.02],
                 [0.84, 0.88, 0.04, 0.86], [0.8, 0.62, 0.09, 0.65], [0.72, 0.55, 0.1, 0.17],
                 [0.61, 0.42, 0.24, 0.33], [0.72, 0.88, 0.02, 0.95], [0.88, 0.96, 0.09, 0.88],
                 [0.9, 0.05, 0.34, 0.41], [0.9, 0.41, 0.27, 0.36]]
    contents2 = [[0.4, 0.21, 0.78, 0.68], [0.54, 0.06, 0.81, 0.98], [0.2, 0.54, 0.73, 0.85],
                 [0.14, 0.31, 0.86, 0.74], [0.39, 0.14, 0.99, 0.24], [0.23, 0.32, 0.7, 0.75],
                 [0.65, 0.05, 0.39, 0.49], [0.04, 0.52, 0.99, 0.75], [0.14, 0.55, 0.67, 0.63],
                 [0.5, 0.2, 0.69, 0.95], [0.79, 0.09, 0.41, 0.69], [0.4, 0.3, 0.78, 0.74],
                 [0.65, 0.24, 0.63, 0.27], [0.35, 0.3, 0.94, 0.92], [0.39, 0.38, 0.85, 0.32],
                 [0.38, 0.07, 0.82, 0.01], [0.66, 0.09, 0.69, 0.46], [0.26, 0.39, 0.95, 0.63],
                 [0.54, 0.06, 0.74, 0.86], [0.2, 0.48, 0.98, 0.84], [0.62, 0.24, 0.77, 0.17],
                 [0.27, 0.38, 0.76, 0.63], [0.7, 0.04, 0.7, 0.82], [0.41, 0.11, 0.61, 0.78],
                 [0.22, 0.44, 0.67, 0.99], [0.51, 0.05, 0.95, 0.66], [0.44, 0.1, 0.61, 0.98],
                 [0.31, 0.16, 0.95, 0.9], [0.31, 0.5, 0.87, 0.85], [0.5, 0.09, 0.84, 0.78],
                 [0.62, 0.01, 0.88, 0.1], [0.44, 0.28, 0.88, 0.99], [0.57, 0.23, 0.6, 0.85],
                 [0.72, 0.14, 0.63, 0.37], [0.39, 0.08, 0.77, 0.96], [0.09, 0.47, 0.63, 0.8],
                 [0.63, 0.05, 0.52, 0.63], [0.62, 0.27, 0.67, 0.77], [0.35, 0.04, 0.85, 0.86],
                 [0.36, 0.34, 0.75, 0.37]]
    centroid0 = [0.54125, 0.7545, 0.66125, 0.3775]
    centroid1 = [0.76900, 0.5705, 0.20550, 0.4775]
    centroid2 = [0.42325, 0.2330, 0.75775, 0.6765]

    introcs.assert_float_lists_equal(centroid0,cluster0.getCentroid())
    introcs.assert_float_lists_equal(centroid1,cluster1.getCentroid())
    introcs.assert_float_lists_equal(centroid2,cluster2.getCentroid())
    assert_point_sets_equal(contents0,cluster0.getContents())
    assert_point_sets_equal(contents1,cluster1.getContents())
    assert_point_sets_equal(contents2,cluster2.getContents())
    print('    File analysis test looks okay')
    print('  Part D of class Algorithm appears correct')
    print()


def test_all():
    """
    Invokes all tests
    """
    print('Starting unit test\n')
    test_dataset()
    test_cluster_a()
    test_cluster_b()
    test_algorithm_a()
    test_algorithm_b()
    test_algorithm_c()
    test_algorithm_d()
    print('All test cases passed!')