Skip to content

Commit c44ff24

Browse files
committed
Unit tests split up into multiple suites.
1 parent 4b55547 commit c44ff24

File tree

3 files changed

+181
-154
lines changed

3 files changed

+181
-154
lines changed
Lines changed: 4 additions & 154 deletions
Original file line numberDiff line numberDiff line change
@@ -15,35 +15,10 @@
1515
# 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
1616
#
1717

18-
from cluster import (HierarchicalClustering, KMeansClustering, ClusteringError)
19-
from difflib import SequenceMatcher
2018
import unittest
21-
try:
22-
import numpy
23-
NUMPY_AVAILABLE = True
24-
except:
25-
NUMPY_AVAILABLE = False
26-
27-
28-
def compare_list(x, y):
29-
"""
30-
Compare lists by content. Ordering does not matter.
31-
Returns True if both lists contain the same items (and are of identical
32-
length)
33-
"""
34-
35-
cmpx = [set(cluster) for cluster in x]
36-
cmpy = [set(cluster) for cluster in y]
37-
38-
all_ok = True
39-
40-
for cset in cmpx:
41-
all_ok &= cset in cmpy
42-
43-
for cset in cmpy:
44-
all_ok &= cset in cmpx
19+
from difflib import SequenceMatcher
4520

46-
return all_ok
21+
from cluster import HierarchicalClustering
4722

4823

4924
class HClusterSmallListTestCase(unittest.TestCase):
@@ -188,8 +163,8 @@ def testDataTypes(self):
188163
cl = HierarchicalClustering(self.__data, self.sim)
189164
for item in cl.getlevel(0.5):
190165
self.assertEqual(
191-
type(item), type([]),
192-
"Every item should be a list!")
166+
type(item), type([]),
167+
"Every item should be a list!")
193168

194169
def testCluster(self):
195170
"Basic Hierachical clustering test with strings"
@@ -212,128 +187,3 @@ def testUnmodifiedData(self):
212187
new_data = []
213188
[new_data.extend(_) for _ in cl.getlevel(0.5)]
214189
self.assertEqual(sorted(new_data), sorted(self.__data))
215-
216-
217-
class KClusterSmallListTestCase(unittest.TestCase):
218-
219-
def testClusterLen1(self):
220-
"Testing that a search space of length 1 returns only one cluster"
221-
cl = KMeansClustering([876])
222-
self.assertEqual([876], cl.getclusters(2))
223-
self.assertEqual([876], cl.getclusters(5))
224-
225-
def testClusterLen0(self):
226-
"Testing if clustering an empty set, returns an empty set"
227-
cl = KMeansClustering([])
228-
self.assertEqual([], cl.getclusters(2))
229-
self.assertEqual([], cl.getclusters(7))
230-
231-
232-
class KCluster2DTestCase(unittest.TestCase):
233-
234-
def testClusterCount(self):
235-
"Test that asking for less than 2 clusters raises an error"
236-
cl = KMeansClustering([876, 123, 344, 676],
237-
distance=lambda x, y: abs(x - y))
238-
self.assertRaises(ClusteringError, cl.getclusters, 0)
239-
self.assertRaises(ClusteringError, cl.getclusters, 1)
240-
241-
def testNonsenseCluster(self):
242-
"""
243-
Test that asking for more clusters than data-items available raises an
244-
error
245-
"""
246-
cl = KMeansClustering([876, 123], distance=lambda x, y: abs(x - y))
247-
self.assertRaises(ClusteringError, cl.getclusters, 5)
248-
249-
def testUniformLength(self):
250-
"""
251-
Test if there is an item in the cluster that has a different
252-
cardinality
253-
"""
254-
data = [(1, 5), (2, 5), (2, 6), (3, 4), (3, 5), (3, 6, 7), (7, 3), (8,
255-
1), (8, 2), (8), (9, 2), (9, 3)]
256-
self.assertRaises(ValueError, KMeansClustering, data)
257-
258-
def testPointDoubling(self):
259-
"test for bug #1604868"
260-
data = [(18, 13), (15, 12), (17, 12), (18, 12), (19, 12), (16, 11),
261-
(18, 11), (19, 10), (0, 0), (1, 4), (1, 2), (2, 3), (4, 1),
262-
(4, 3), (5, 2), (6, 1)]
263-
cl = KMeansClustering(data)
264-
clusters = cl.getclusters(2)
265-
expected = [[(18, 13), (15, 12), (17, 12), (18, 12), (19, 12), (16,
266-
11), (18, 11), (19, 10)], [(0, 0), (1, 4), (1, 2), (2, 3), (4, 1),
267-
(5, 2), (6, 1), (4, 3)]]
268-
self.assertTrue(compare_list(
269-
clusters,
270-
expected),
271-
"Elements differ!\n%s\n%s" % (clusters, expected))
272-
273-
def testClustering(self):
274-
"Basic clustering test"
275-
data = [(8, 2), (7, 3), (2, 6), (3, 5), (3, 6), (1, 5), (8, 1), (3,
276-
4), (8, 3), (9, 2), (2, 5), (9, 3)]
277-
cl = KMeansClustering(data)
278-
self.assertEqual(
279-
cl.getclusters(2),
280-
[[(8, 2), (8, 1), (8, 3), (7, 3), (9, 2), (9, 3)],
281-
[(3, 5), (1, 5), (3, 4), (2, 6), (2, 5), (3, 6)]])
282-
283-
def testUnmodifiedData(self):
284-
"Basic clustering test"
285-
data = [(8, 2), (7, 3), (2, 6), (3, 5), (3, 6), (1, 5), (8, 1), (3,
286-
4), (8, 3), (9, 2), (2, 5), (9, 3)]
287-
cl = KMeansClustering(data)
288-
289-
new_data = []
290-
[new_data.extend(_) for _ in cl.getclusters(2)]
291-
self.assertEqual(sorted(new_data), sorted(data))
292-
293-
294-
class KClusterSFBugs(unittest.TestCase):
295-
296-
def testLostFunctionReference(self):
297-
"test for bug #1727558"
298-
cl = KMeansClustering([(1, 1), (20, 40), (20, 41)],
299-
lambda x, y: x + y)
300-
clusters = cl.getclusters(3)
301-
expected = [(1, 1), (20, 40), (20, 41)]
302-
self.assertTrue(compare_list(
303-
clusters,
304-
expected),
305-
"Elements differ!\n%s\n%s" % (clusters, expected))
306-
307-
def testMultidimArray(self):
308-
from random import random
309-
data = []
310-
for _ in range(200):
311-
data.append([random(), random()])
312-
cl = KMeansClustering(data, lambda p0, p1: (
313-
p0[0] - p1[0]) ** 2 + (p0[1] - p1[1]) ** 2)
314-
cl.getclusters(10)
315-
316-
317-
@unittest.skipUnless(NUMPY_AVAILABLE,
318-
'numpy not available. Associated test will not be loaded!')
319-
class NumpyTests(unittest.TestCase):
320-
321-
def testNumpyRandom(self):
322-
data = numpy.random.rand(500, 2)
323-
cl = KMeansClustering(data, lambda p0, p1: (
324-
p0[0] - p1[0]) ** 2 + (p0[1] - p1[1]) ** 2, numpy.array_equal)
325-
cl.getclusters(10)
326-
327-
328-
if __name__ == '__main__':
329-
suite = unittest.TestSuite((
330-
unittest.makeSuite(HClusterIntegerTestCase),
331-
unittest.makeSuite(HClusterSmallListTestCase),
332-
unittest.makeSuite(HClusterStringTestCase),
333-
unittest.makeSuite(KCluster2DTestCase),
334-
unittest.makeSuite(KClusterSFBugs),
335-
unittest.makeSuite(KClusterSmallListTestCase),
336-
unittest.makeSuite(NumpyTests),
337-
))
338-
339-
unittest.TextTestRunner(verbosity=2).run(suite)

cluster/test/test_kmeans.py

Lines changed: 141 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,141 @@
1+
#
2+
# This is part of "python-cluster". A library to group similar items together.
3+
# Copyright (C) 2006 Michel Albert
4+
#
5+
# This library is free software; you can redistribute it and/or modify it under
6+
# the terms of the GNU Lesser General Public License as published by the Free
7+
# Software Foundation; either version 2.1 of the License, or (at your option)
8+
# any later version.
9+
# This library is distributed in the hope that it will be useful, but WITHOUT
10+
# ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
11+
# FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public License for more
12+
# details.
13+
# You should have received a copy of the GNU Lesser General Public License
14+
# along with this library; if not, write to the Free Software Foundation, Inc.,
15+
# 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
16+
#
17+
18+
from cluster import (KMeansClustering, ClusteringError)
19+
import unittest
20+
21+
22+
def compare_list(x, y):
23+
"""
24+
Compare lists by content. Ordering does not matter.
25+
Returns True if both lists contain the same items (and are of identical
26+
length)
27+
"""
28+
29+
cmpx = [set(cluster) for cluster in x]
30+
cmpy = [set(cluster) for cluster in y]
31+
32+
all_ok = True
33+
34+
for cset in cmpx:
35+
all_ok &= cset in cmpy
36+
37+
for cset in cmpy:
38+
all_ok &= cset in cmpx
39+
40+
return all_ok
41+
42+
43+
class KClusterSmallListTestCase(unittest.TestCase):
44+
45+
def testClusterLen1(self):
46+
"Testing that a search space of length 1 returns only one cluster"
47+
cl = KMeansClustering([876])
48+
self.assertEqual([876], cl.getclusters(2))
49+
self.assertEqual([876], cl.getclusters(5))
50+
51+
def testClusterLen0(self):
52+
"Testing if clustering an empty set, returns an empty set"
53+
cl = KMeansClustering([])
54+
self.assertEqual([], cl.getclusters(2))
55+
self.assertEqual([], cl.getclusters(7))
56+
57+
58+
class KCluster2DTestCase(unittest.TestCase):
59+
60+
def testClusterCount(self):
61+
"Test that asking for less than 2 clusters raises an error"
62+
cl = KMeansClustering([876, 123, 344, 676],
63+
distance=lambda x, y: abs(x - y))
64+
self.assertRaises(ClusteringError, cl.getclusters, 0)
65+
self.assertRaises(ClusteringError, cl.getclusters, 1)
66+
67+
def testNonsenseCluster(self):
68+
"""
69+
Test that asking for more clusters than data-items available raises an
70+
error
71+
"""
72+
cl = KMeansClustering([876, 123], distance=lambda x, y: abs(x - y))
73+
self.assertRaises(ClusteringError, cl.getclusters, 5)
74+
75+
def testUniformLength(self):
76+
"""
77+
Test if there is an item in the cluster that has a different
78+
cardinality
79+
"""
80+
data = [(1, 5), (2, 5), (2, 6), (3, 4), (3, 5), (3, 6, 7), (7, 3),
81+
(8, 1), (8, 2), (8), (9, 2), (9, 3)]
82+
self.assertRaises(ValueError, KMeansClustering, data)
83+
84+
def testPointDoubling(self):
85+
"test for bug #1604868"
86+
data = [(18, 13), (15, 12), (17, 12), (18, 12), (19, 12), (16, 11),
87+
(18, 11), (19, 10), (0, 0), (1, 4), (1, 2), (2, 3), (4, 1),
88+
(4, 3), (5, 2), (6, 1)]
89+
cl = KMeansClustering(data)
90+
clusters = cl.getclusters(2)
91+
expected = [[(18, 13), (15, 12), (17, 12), (18, 12), (19, 12),
92+
(16, 11), (18, 11), (19, 10)],
93+
[(0, 0), (1, 4), (1, 2), (2, 3), (4, 1),
94+
(5, 2), (6, 1), (4, 3)]]
95+
self.assertTrue(compare_list(
96+
clusters,
97+
expected),
98+
"Elements differ!\n%s\n%s" % (clusters, expected))
99+
100+
def testClustering(self):
101+
"Basic clustering test"
102+
data = [(8, 2), (7, 3), (2, 6), (3, 5), (3, 6), (1, 5), (8, 1),
103+
(3, 4), (8, 3), (9, 2), (2, 5), (9, 3)]
104+
cl = KMeansClustering(data)
105+
self.assertEqual(
106+
cl.getclusters(2),
107+
[[(8, 2), (8, 1), (8, 3), (7, 3), (9, 2), (9, 3)],
108+
[(3, 5), (1, 5), (3, 4), (2, 6), (2, 5), (3, 6)]])
109+
110+
def testUnmodifiedData(self):
111+
"Basic clustering test"
112+
data = [(8, 2), (7, 3), (2, 6), (3, 5), (3, 6), (1, 5), (8, 1),
113+
(3, 4), (8, 3), (9, 2), (2, 5), (9, 3)]
114+
cl = KMeansClustering(data)
115+
116+
new_data = []
117+
[new_data.extend(_) for _ in cl.getclusters(2)]
118+
self.assertEqual(sorted(new_data), sorted(data))
119+
120+
121+
class KClusterSFBugs(unittest.TestCase):
122+
123+
def testLostFunctionReference(self):
124+
"test for bug #1727558"
125+
cl = KMeansClustering([(1, 1), (20, 40), (20, 41)],
126+
lambda x, y: x + y)
127+
clusters = cl.getclusters(3)
128+
expected = [(1, 1), (20, 40), (20, 41)]
129+
self.assertTrue(compare_list(
130+
clusters,
131+
expected),
132+
"Elements differ!\n%s\n%s" % (clusters, expected))
133+
134+
def testMultidimArray(self):
135+
from random import random
136+
data = []
137+
for _ in range(200):
138+
data.append([random(), random()])
139+
cl = KMeansClustering(data, lambda p0, p1: (
140+
p0[0] - p1[0]) ** 2 + (p0[1] - p1[1]) ** 2)
141+
cl.getclusters(10)

cluster/test/test_numpy.py

Lines changed: 36 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,36 @@
1+
#
2+
# This is part of "python-cluster". A library to group similar items together.
3+
# Copyright (C) 2006 Michel Albert
4+
#
5+
# This library is free software; you can redistribute it and/or modify it under
6+
# the terms of the GNU Lesser General Public License as published by the Free
7+
# Software Foundation; either version 2.1 of the License, or (at your option)
8+
# any later version.
9+
# This library is distributed in the hope that it will be useful, but WITHOUT
10+
# ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
11+
# FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public License for more
12+
# details.
13+
# You should have received a copy of the GNU Lesser General Public License
14+
# along with this library; if not, write to the Free Software Foundation, Inc.,
15+
# 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
16+
#
17+
18+
import unittest
19+
20+
from cluster import KMeansClustering
21+
try:
22+
import numpy
23+
NUMPY_AVAILABLE = True
24+
except:
25+
NUMPY_AVAILABLE = False
26+
27+
28+
@unittest.skipUnless(NUMPY_AVAILABLE,
29+
'numpy not available. Associated test will not be loaded!')
30+
class NumpyTests(unittest.TestCase):
31+
32+
def testNumpyRandom(self):
33+
data = numpy.random.rand(500, 2)
34+
cl = KMeansClustering(data, lambda p0, p1: (
35+
p0[0] - p1[0]) ** 2 + (p0[1] - p1[1]) ** 2, numpy.array_equal)
36+
cl.getclusters(10)

0 commit comments

Comments
 (0)