|
1 | 1 | # |
2 | 2 | # This is part of "python-cluster". A library to group similar items together. |
3 | | -# Copyright (C) 2006 Michel Albert |
| 3 | +# Copyright (C) 2006 Michel Albert |
4 | 4 | # |
5 | 5 | # This library is free software; you can redistribute it and/or modify it under |
6 | 6 | # the terms of the GNU Lesser General Public License as published by the Free |
|
20 | 20 | import unittest |
21 | 21 |
|
22 | 22 | def compare_list(x, y): |
23 | | - """ |
24 | | - Compare lists by content. Ordering does not matter. |
25 | | - Returns True if both lists contain the same items (and are of identical |
26 | | - length) |
27 | | - """ |
| 23 | + """ |
| 24 | + Compare lists by content. Ordering does not matter. |
| 25 | + Returns True if both lists contain the same items (and are of identical |
| 26 | + length) |
| 27 | + """ |
28 | 28 |
|
29 | | - cmpx = [set(cluster) for cluster in x] |
30 | | - cmpy = [set(cluster) for cluster in y] |
| 29 | + cmpx = [set(cluster) for cluster in x] |
| 30 | + cmpy = [set(cluster) for cluster in y] |
31 | 31 |
|
32 | | - all_ok = True |
| 32 | + all_ok = True |
33 | 33 |
|
34 | | - for cset in cmpx: |
35 | | - all_ok &= cset in cmpy |
| 34 | + for cset in cmpx: |
| 35 | + all_ok &= cset in cmpy |
36 | 36 |
|
37 | | - for cset in cmpy: |
38 | | - all_ok &= cset in cmpx |
| 37 | + for cset in cmpy: |
| 38 | + all_ok &= cset in cmpx |
39 | 39 |
|
40 | | - return all_ok |
| 40 | + return all_ok |
41 | 41 |
|
42 | 42 |
|
43 | 43 | class HClusterSmallListTestCase(unittest.TestCase): |
44 | | - " Test for Bug #1516204 " |
| 44 | + " Test for Bug #1516204 " |
45 | 45 |
|
46 | | - def testClusterLen1(self): |
47 | | - "Testing if hierarchical clustering a set of length 1 returns a set of length 1" |
48 | | - cl = HierarchicalClustering([876], lambda x,y: abs(x-y)) |
49 | | - self.assertEqual([876], cl.getlevel(40)) |
| 46 | + def testClusterLen1(self): |
| 47 | + "Testing if hierarchical clustering a set of length 1 returns a set of length 1" |
| 48 | + cl = HierarchicalClustering([876], lambda x,y: abs(x-y)) |
| 49 | + self.assertEqual([876], cl.getlevel(40)) |
50 | 50 |
|
51 | | - def testClusterLen0(self): |
52 | | - "Testing if hierarchical clustering an empty list returns an empty list" |
53 | | - cl = HierarchicalClustering([], lambda x,y: abs(x-y)) |
54 | | - self.assertEqual([], cl.getlevel(40)) |
| 51 | + def testClusterLen0(self): |
| 52 | + "Testing if hierarchical clustering an empty list returns an empty list" |
| 53 | + cl = HierarchicalClustering([], lambda x,y: abs(x-y)) |
| 54 | + self.assertEqual([], cl.getlevel(40)) |
55 | 55 |
|
56 | 56 | class HClusterIntegerTestCase(unittest.TestCase): |
57 | 57 |
|
58 | | - def setUp(self): |
59 | | - self.__data = [791, 956, 676, 124, 564, 84, 24, 365, 594, 940, 398, |
60 | | - 971, 131, 365, 542, 336, 518, 835, 134, 391] |
61 | | - |
62 | | - def testCluster(self): |
63 | | - "Basic Hierarchical Clustering test with integers" |
64 | | - cl = HierarchicalClustering(self.__data, lambda x,y: abs(x-y)) |
65 | | - cl.cluster() |
66 | | - self.assertEqual( [ |
67 | | - [24], |
68 | | - [84, 124, 131, 134], |
69 | | - [336, 365, 365, 365, 398, 391], |
70 | | - [940, 956, 971], |
71 | | - [791], |
72 | | - [835], |
73 | | - [676], |
74 | | - [518, 564, 542] |
75 | | - ], |
76 | | - cl.getlevel(40)) |
| 58 | + def setUp(self): |
| 59 | + self.__data = [791, 956, 676, 124, 564, 84, 24, 365, 594, 940, 398, |
| 60 | + 971, 131, 365, 542, 336, 518, 835, 134, 391] |
| 61 | + |
| 62 | + def testCluster(self): |
| 63 | + "Basic Hierarchical Clustering test with integers" |
| 64 | + cl = HierarchicalClustering(self.__data, lambda x,y: abs(x-y)) |
| 65 | + cl.cluster() |
| 66 | + self.assertEqual( [ |
| 67 | + [24], |
| 68 | + [84, 124, 131, 134], |
| 69 | + [336, 365, 365, 365, 398, 391], |
| 70 | + [940, 956, 971], |
| 71 | + [791], |
| 72 | + [835], |
| 73 | + [676], |
| 74 | + [518, 564, 542] |
| 75 | + ], |
| 76 | + cl.getlevel(40)) |
77 | 77 |
|
78 | 78 | class HClusterStringTestCase(unittest.TestCase): |
79 | 79 |
|
80 | | - def sim(self, x, y): |
81 | | - sm = SequenceMatcher(lambda x: x in ". -", x, y) |
82 | | - return 1-sm.ratio() |
83 | | - |
84 | | - def setUp(self): |
85 | | - self.__data = "Lorem ipsum dolor sit amet, consectetuer adipiscing elit. Ut elit. Phasellus consequat ultricies mi. Sed congue leo at neque. Nullam.". split() |
86 | | - |
87 | | - def testDataTypes(self): |
88 | | - "Test for bug #?" |
89 | | - cl = HierarchicalClustering(self.__data, self.sim) |
90 | | - for item in cl.getlevel(0.5): |
91 | | - self.assertEqual( |
92 | | - type(item), type([]), |
93 | | - "Every item should be a list!") |
94 | | - |
95 | | - def testCluster(self): |
96 | | - "Basic Hierachical clustering test with strings" |
97 | | - cl = HierarchicalClustering(self.__data, self.sim) |
98 | | - self.assertEqual([ |
99 | | - ['Nullam.'], |
100 | | - ['Sed'], |
101 | | - ['mi.'], |
102 | | - ['ultricies'], |
103 | | - ['Phasellus'], |
104 | | - ['amet,', 'at'], |
105 | | - ['sit', 'elit.', 'elit.', 'elit.'], |
106 | | - ['leo', 'Lorem', 'dolor'], |
107 | | - ['neque.', 'congue', 'consectetuer', 'consequat'], |
108 | | - ['ipsum'], |
109 | | - ['adipiscing'] |
110 | | - ], |
111 | | - cl.getlevel(0.5) |
112 | | - ) |
| 80 | + def sim(self, x, y): |
| 81 | + sm = SequenceMatcher(lambda x: x in ". -", x, y) |
| 82 | + return 1-sm.ratio() |
| 83 | + |
| 84 | + def setUp(self): |
| 85 | + self.__data = "Lorem ipsum dolor sit amet, consectetuer adipiscing elit. Ut elit. Phasellus consequat ultricies mi. Sed congue leo at neque. Nullam.". split() |
| 86 | + |
| 87 | + def testDataTypes(self): |
| 88 | + "Test for bug #?" |
| 89 | + cl = HierarchicalClustering(self.__data, self.sim) |
| 90 | + for item in cl.getlevel(0.5): |
| 91 | + self.assertEqual( |
| 92 | + type(item), type([]), |
| 93 | + "Every item should be a list!") |
| 94 | + |
| 95 | + def testCluster(self): |
| 96 | + "Basic Hierachical clustering test with strings" |
| 97 | + cl = HierarchicalClustering(self.__data, self.sim) |
| 98 | + self.assertEqual([ |
| 99 | + ['Nullam.'], |
| 100 | + ['Sed'], |
| 101 | + ['mi.'], |
| 102 | + ['ultricies'], |
| 103 | + ['Phasellus'], |
| 104 | + ['amet,', 'at'], |
| 105 | + ['sit', 'elit.', 'elit.', 'elit.'], |
| 106 | + ['leo', 'Lorem', 'dolor'], |
| 107 | + ['neque.', 'congue', 'consectetuer', 'consequat'], |
| 108 | + ['ipsum'], |
| 109 | + ['adipiscing'] |
| 110 | + ], |
| 111 | + cl.getlevel(0.5) |
| 112 | + ) |
113 | 113 |
|
114 | 114 | class KClusterSmallListTestCase(unittest.TestCase): |
115 | 115 |
|
116 | | - def testClusterLen1(self): |
117 | | - "Testing that a search space of length 1 returns only one cluster" |
118 | | - cl = KMeansClustering([876]) |
119 | | - self.assertEqual([876], cl.getclusters(2)) |
120 | | - self.assertEqual([876], cl.getclusters(5)) |
| 116 | + def testClusterLen1(self): |
| 117 | + "Testing that a search space of length 1 returns only one cluster" |
| 118 | + cl = KMeansClustering([876]) |
| 119 | + self.assertEqual([876], cl.getclusters(2)) |
| 120 | + self.assertEqual([876], cl.getclusters(5)) |
121 | 121 |
|
122 | | - def testClusterLen0(self): |
123 | | - "Testing if clustering an empty set, returns an empty set" |
124 | | - cl = KMeansClustering([]) |
125 | | - self.assertEqual([], cl.getclusters(2)) |
126 | | - self.assertEqual([], cl.getclusters(7)) |
| 122 | + def testClusterLen0(self): |
| 123 | + "Testing if clustering an empty set, returns an empty set" |
| 124 | + cl = KMeansClustering([]) |
| 125 | + self.assertEqual([], cl.getclusters(2)) |
| 126 | + self.assertEqual([], cl.getclusters(7)) |
127 | 127 |
|
128 | 128 | class KCluster2DTestCase(unittest.TestCase): |
129 | 129 |
|
130 | | - def testClusterCount(self): |
131 | | - "Test that asking for less than 2 clusters raises an error" |
132 | | - cl = KMeansClustering([876, 123, 344, 676], distance=lambda x,y: abs(x-y)) |
133 | | - self.assertRaises(ClusteringError, cl.getclusters, 0) |
134 | | - self.assertRaises(ClusteringError, cl.getclusters, 1) |
135 | | - |
136 | | - def testNonsenseCluster(self): |
137 | | - "Test that asking for more clusters than data-items available raises an error" |
138 | | - cl = KMeansClustering([876, 123], distance=lambda x,y: abs(x-y)) |
139 | | - self.assertRaises(ClusteringError, cl.getclusters, 5) |
140 | | - |
141 | | - def testUniformLength(self): |
142 | | - "Test if there is an item in the cluster that has a different cardinality" |
143 | | - data = [ (1,5), (2,5), (2,6), (3,4), (3,5), (3,6,7), (7,3), (8,1), (8,2), (8), (9,2), (9,3) ] |
144 | | - self.assertRaises(ValueError, KMeansClustering, data) |
145 | | - |
146 | | - def testPointDoubling(self): |
147 | | - "test for bug #1604868" |
148 | | - data = [ (18,13), (15, 12), (17,12), (18,12), (19,12), (16,11), (18, 11), |
149 | | - (19,10), (0,0), (1, 4), (1,2), (2,3), (4,1), (4,3), (5,2), (6,1)] |
150 | | - cl = KMeansClustering(data) |
151 | | - clusters = cl.getclusters(2) |
152 | | - expected = [[(18,13), (15, 12), (17,12), (18,12), (19,12), (16,11), (18, 11), (19,10)], |
153 | | - [(0,0), (1, 4), (1,2), (2,3), (4,1), (5,2), (6,1), (4,3)]] |
154 | | - self.assertTrue( compare_list( |
155 | | - clusters, |
156 | | - expected ), |
157 | | - "Elements differ!\n%s\n%s" % (clusters, expected)) |
158 | | - |
159 | | - def testClustering(self): |
160 | | - "Basic clustering test" |
161 | | - data = [ (8,2), (7,3), (2,6), (3,5), (3,6), (1,5), (8,1), (3,4), (8,3), (9,2), (2,5), (9,3) ] |
162 | | - cl = KMeansClustering(data) |
163 | | - clusters = cl.getclusters(2) |
164 | | - self.assertEqual( |
165 | | - cl.getclusters(2), |
166 | | - [[(8, 2), (8, 1), (8, 3), (7, 3), (9, 2), (9, 3)], |
167 | | - [(3, 5), (1, 5), (3, 4), (2, 6), (2, 5), (3, 6)]]) |
| 130 | + def testClusterCount(self): |
| 131 | + "Test that asking for less than 2 clusters raises an error" |
| 132 | + cl = KMeansClustering([876, 123, 344, 676], distance=lambda x,y: abs(x-y)) |
| 133 | + self.assertRaises(ClusteringError, cl.getclusters, 0) |
| 134 | + self.assertRaises(ClusteringError, cl.getclusters, 1) |
| 135 | + |
| 136 | + def testNonsenseCluster(self): |
| 137 | + "Test that asking for more clusters than data-items available raises an error" |
| 138 | + cl = KMeansClustering([876, 123], distance=lambda x,y: abs(x-y)) |
| 139 | + self.assertRaises(ClusteringError, cl.getclusters, 5) |
| 140 | + |
| 141 | + def testUniformLength(self): |
| 142 | + "Test if there is an item in the cluster that has a different cardinality" |
| 143 | + data = [ (1,5), (2,5), (2,6), (3,4), (3,5), (3,6,7), (7,3), (8,1), (8,2), (8), (9,2), (9,3) ] |
| 144 | + self.assertRaises(ValueError, KMeansClustering, data) |
| 145 | + |
| 146 | + def testPointDoubling(self): |
| 147 | + "test for bug #1604868" |
| 148 | + data = [ (18,13), (15, 12), (17,12), (18,12), (19,12), (16,11), (18, 11), |
| 149 | + (19,10), (0,0), (1, 4), (1,2), (2,3), (4,1), (4,3), (5,2), (6,1)] |
| 150 | + cl = KMeansClustering(data) |
| 151 | + clusters = cl.getclusters(2) |
| 152 | + expected = [[(18,13), (15, 12), (17,12), (18,12), (19,12), (16,11), (18, 11), (19,10)], |
| 153 | + [(0,0), (1, 4), (1,2), (2,3), (4,1), (5,2), (6,1), (4,3)]] |
| 154 | + self.assertTrue( compare_list( |
| 155 | + clusters, |
| 156 | + expected ), |
| 157 | + "Elements differ!\n%s\n%s" % (clusters, expected)) |
| 158 | + |
| 159 | + def testClustering(self): |
| 160 | + "Basic clustering test" |
| 161 | + data = [ (8,2), (7,3), (2,6), (3,5), (3,6), (1,5), (8,1), (3,4), (8,3), (9,2), (2,5), (9,3) ] |
| 162 | + cl = KMeansClustering(data) |
| 163 | + clusters = cl.getclusters(2) |
| 164 | + self.assertEqual( |
| 165 | + cl.getclusters(2), |
| 166 | + [[(8, 2), (8, 1), (8, 3), (7, 3), (9, 2), (9, 3)], |
| 167 | + [(3, 5), (1, 5), (3, 4), (2, 6), (2, 5), (3, 6)]]) |
168 | 168 |
|
169 | 169 | class KClusterSFBugs(unittest.TestCase): |
170 | 170 |
|
171 | | - def testLostFunctionReference(self): |
172 | | - "test for bug #1727558" |
173 | | - cl = KMeansClustering([(1,1), (20,40), (20,41)], lambda x,y:x+y) |
174 | | - clusters = cl.getclusters(3) |
175 | | - expected = [(1,1), (20,40), (20,41)] |
176 | | - self.assertTrue( compare_list( |
177 | | - clusters, |
178 | | - expected ), |
179 | | - "Elements differ!\n%s\n%s" % (clusters, expected)) |
| 171 | + def testLostFunctionReference(self): |
| 172 | + "test for bug #1727558" |
| 173 | + cl = KMeansClustering([(1,1), (20,40), (20,41)], lambda x,y:x+y) |
| 174 | + clusters = cl.getclusters(3) |
| 175 | + expected = [(1,1), (20,40), (20,41)] |
| 176 | + self.assertTrue( compare_list( |
| 177 | + clusters, |
| 178 | + expected ), |
| 179 | + "Elements differ!\n%s\n%s" % (clusters, expected)) |
180 | 180 |
|
181 | 181 | unittest.TextTestRunner(verbosity=2).run( |
182 | | - unittest.TestSuite(( |
183 | | - unittest.makeSuite(HClusterSmallListTestCase), |
184 | | - unittest.makeSuite(HClusterIntegerTestCase), |
185 | | - unittest.makeSuite(HClusterStringTestCase), |
186 | | - unittest.makeSuite(KClusterSmallListTestCase), |
187 | | - unittest.makeSuite(KCluster2DTestCase), |
188 | | - unittest.makeSuite(KClusterSFBugs), |
189 | | - )) |
190 | | - ) |
| 182 | + unittest.TestSuite(( |
| 183 | + unittest.makeSuite(HClusterSmallListTestCase), |
| 184 | + unittest.makeSuite(HClusterIntegerTestCase), |
| 185 | + unittest.makeSuite(HClusterStringTestCase), |
| 186 | + unittest.makeSuite(KClusterSmallListTestCase), |
| 187 | + unittest.makeSuite(KCluster2DTestCase), |
| 188 | + unittest.makeSuite(KClusterSFBugs), |
| 189 | + )) |
| 190 | + ) |
0 commit comments