1515# 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
1616#
1717
18- from cluster import (HierarchicalClustering , KMeansClustering , ClusteringError )
19- from difflib import SequenceMatcher
2018import unittest
21- try :
22- import numpy
23- NUMPY_AVAILABLE = True
24- except :
25- NUMPY_AVAILABLE = False
26-
27-
28- def compare_list (x , y ):
29- """
30- Compare lists by content. Ordering does not matter.
31- Returns True if both lists contain the same items (and are of identical
32- length)
33- """
34-
35- cmpx = [set (cluster ) for cluster in x ]
36- cmpy = [set (cluster ) for cluster in y ]
37-
38- all_ok = True
39-
40- for cset in cmpx :
41- all_ok &= cset in cmpy
42-
43- for cset in cmpy :
44- all_ok &= cset in cmpx
19+ from difflib import SequenceMatcher
4520
46- return all_ok
21+ from cluster import HierarchicalClustering
4722
4823
4924class HClusterSmallListTestCase (unittest .TestCase ):
@@ -188,8 +163,8 @@ def testDataTypes(self):
188163 cl = HierarchicalClustering (self .__data , self .sim )
189164 for item in cl .getlevel (0.5 ):
190165 self .assertEqual (
191- type (item ), type ([]),
192- "Every item should be a list!" )
166+ type (item ), type ([]),
167+ "Every item should be a list!" )
193168
194169 def testCluster (self ):
195170 "Basic Hierachical clustering test with strings"
@@ -212,128 +187,3 @@ def testUnmodifiedData(self):
212187 new_data = []
213188 [new_data .extend (_ ) for _ in cl .getlevel (0.5 )]
214189 self .assertEqual (sorted (new_data ), sorted (self .__data ))
215-
216-
217- class KClusterSmallListTestCase (unittest .TestCase ):
218-
219- def testClusterLen1 (self ):
220- "Testing that a search space of length 1 returns only one cluster"
221- cl = KMeansClustering ([876 ])
222- self .assertEqual ([876 ], cl .getclusters (2 ))
223- self .assertEqual ([876 ], cl .getclusters (5 ))
224-
225- def testClusterLen0 (self ):
226- "Testing if clustering an empty set, returns an empty set"
227- cl = KMeansClustering ([])
228- self .assertEqual ([], cl .getclusters (2 ))
229- self .assertEqual ([], cl .getclusters (7 ))
230-
231-
232- class KCluster2DTestCase (unittest .TestCase ):
233-
234- def testClusterCount (self ):
235- "Test that asking for less than 2 clusters raises an error"
236- cl = KMeansClustering ([876 , 123 , 344 , 676 ],
237- distance = lambda x , y : abs (x - y ))
238- self .assertRaises (ClusteringError , cl .getclusters , 0 )
239- self .assertRaises (ClusteringError , cl .getclusters , 1 )
240-
241- def testNonsenseCluster (self ):
242- """
243- Test that asking for more clusters than data-items available raises an
244- error
245- """
246- cl = KMeansClustering ([876 , 123 ], distance = lambda x , y : abs (x - y ))
247- self .assertRaises (ClusteringError , cl .getclusters , 5 )
248-
249- def testUniformLength (self ):
250- """
251- Test if there is an item in the cluster that has a different
252- cardinality
253- """
254- data = [(1 , 5 ), (2 , 5 ), (2 , 6 ), (3 , 4 ), (3 , 5 ), (3 , 6 , 7 ), (7 , 3 ), (8 ,
255- 1 ), (8 , 2 ), (8 ), (9 , 2 ), (9 , 3 )]
256- self .assertRaises (ValueError , KMeansClustering , data )
257-
258- def testPointDoubling (self ):
259- "test for bug #1604868"
260- data = [(18 , 13 ), (15 , 12 ), (17 , 12 ), (18 , 12 ), (19 , 12 ), (16 , 11 ),
261- (18 , 11 ), (19 , 10 ), (0 , 0 ), (1 , 4 ), (1 , 2 ), (2 , 3 ), (4 , 1 ),
262- (4 , 3 ), (5 , 2 ), (6 , 1 )]
263- cl = KMeansClustering (data )
264- clusters = cl .getclusters (2 )
265- expected = [[(18 , 13 ), (15 , 12 ), (17 , 12 ), (18 , 12 ), (19 , 12 ), (16 ,
266- 11 ), (18 , 11 ), (19 , 10 )], [(0 , 0 ), (1 , 4 ), (1 , 2 ), (2 , 3 ), (4 , 1 ),
267- (5 , 2 ), (6 , 1 ), (4 , 3 )]]
268- self .assertTrue (compare_list (
269- clusters ,
270- expected ),
271- "Elements differ!\n %s\n %s" % (clusters , expected ))
272-
273- def testClustering (self ):
274- "Basic clustering test"
275- data = [(8 , 2 ), (7 , 3 ), (2 , 6 ), (3 , 5 ), (3 , 6 ), (1 , 5 ), (8 , 1 ), (3 ,
276- 4 ), (8 , 3 ), (9 , 2 ), (2 , 5 ), (9 , 3 )]
277- cl = KMeansClustering (data )
278- self .assertEqual (
279- cl .getclusters (2 ),
280- [[(8 , 2 ), (8 , 1 ), (8 , 3 ), (7 , 3 ), (9 , 2 ), (9 , 3 )],
281- [(3 , 5 ), (1 , 5 ), (3 , 4 ), (2 , 6 ), (2 , 5 ), (3 , 6 )]])
282-
283- def testUnmodifiedData (self ):
284- "Basic clustering test"
285- data = [(8 , 2 ), (7 , 3 ), (2 , 6 ), (3 , 5 ), (3 , 6 ), (1 , 5 ), (8 , 1 ), (3 ,
286- 4 ), (8 , 3 ), (9 , 2 ), (2 , 5 ), (9 , 3 )]
287- cl = KMeansClustering (data )
288-
289- new_data = []
290- [new_data .extend (_ ) for _ in cl .getclusters (2 )]
291- self .assertEqual (sorted (new_data ), sorted (data ))
292-
293-
294- class KClusterSFBugs (unittest .TestCase ):
295-
296- def testLostFunctionReference (self ):
297- "test for bug #1727558"
298- cl = KMeansClustering ([(1 , 1 ), (20 , 40 ), (20 , 41 )],
299- lambda x , y : x + y )
300- clusters = cl .getclusters (3 )
301- expected = [(1 , 1 ), (20 , 40 ), (20 , 41 )]
302- self .assertTrue (compare_list (
303- clusters ,
304- expected ),
305- "Elements differ!\n %s\n %s" % (clusters , expected ))
306-
307- def testMultidimArray (self ):
308- from random import random
309- data = []
310- for _ in range (200 ):
311- data .append ([random (), random ()])
312- cl = KMeansClustering (data , lambda p0 , p1 : (
313- p0 [0 ] - p1 [0 ]) ** 2 + (p0 [1 ] - p1 [1 ]) ** 2 )
314- cl .getclusters (10 )
315-
316-
317- @unittest .skipUnless (NUMPY_AVAILABLE ,
318- 'numpy not available. Associated test will not be loaded!' )
319- class NumpyTests (unittest .TestCase ):
320-
321- def testNumpyRandom (self ):
322- data = numpy .random .rand (500 , 2 )
323- cl = KMeansClustering (data , lambda p0 , p1 : (
324- p0 [0 ] - p1 [0 ]) ** 2 + (p0 [1 ] - p1 [1 ]) ** 2 , numpy .array_equal )
325- cl .getclusters (10 )
326-
327-
328- if __name__ == '__main__' :
329- suite = unittest .TestSuite ((
330- unittest .makeSuite (HClusterIntegerTestCase ),
331- unittest .makeSuite (HClusterSmallListTestCase ),
332- unittest .makeSuite (HClusterStringTestCase ),
333- unittest .makeSuite (KCluster2DTestCase ),
334- unittest .makeSuite (KClusterSFBugs ),
335- unittest .makeSuite (KClusterSmallListTestCase ),
336- unittest .makeSuite (NumpyTests ),
337- ))
338-
339- unittest .TextTestRunner (verbosity = 2 ).run (suite )
0 commit comments