Skip to content

Commit e73e590

Browse files
committed
Clustering: Additional tests for clustering methods
1 parent 45ba9c6 commit e73e590

File tree

4 files changed

+278
-28
lines changed

4 files changed

+278
-28
lines changed

Orange/tests/test_clustering_dbscan.py

Lines changed: 63 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -4,7 +4,9 @@
44
import unittest
55

66
import numpy as np
7+
from scipy.sparse import csc_matrix, csr_matrix
78

9+
from Orange.clustering import ClusteringModel
810
from Orange.data import Table
911
from Orange.clustering.dbscan import DBSCAN
1012

@@ -14,15 +16,75 @@ def setUp(self):
1416
self.iris = Table('iris')
1517
self.dbscan = DBSCAN()
1618

19+
def test_dbscan(self):
20+
c = self.dbscan(self.iris)
21+
# First 20 iris belong to one cluster
22+
self.assertEqual(np.ndarray, type(c))
23+
self.assertEqual(len(self.iris), len(c))
24+
self.assertEqual(1, len(set(c[:20].ravel())))
25+
1726
def test_dbscan_parameters(self):
1827
dbscan = DBSCAN(eps=0.1, min_samples=7, metric='euclidean',
1928
algorithm='auto', leaf_size=12, p=None)
20-
dbscan(self.iris)
29+
c = dbscan(self.iris)
30+
self.assertEqual(np.ndarray, type(c))
31+
self.assertEqual(len(self.iris), len(c))
2132

2233
def test_predict_table(self):
2334
pred = self.dbscan(self.iris)
2435
self.assertEqual(np.ndarray, type(pred))
36+
self.assertEqual(len(self.iris), len(pred))
2537

2638
def test_predict_numpy(self):
2739
model = self.dbscan.fit(self.iris.X)
40+
self.assertEqual(ClusteringModel, type(model))
2841
self.assertEqual(np.ndarray, type(model.labels))
42+
self.assertEqual(len(self.iris), len(model.labels))
43+
44+
def test_predict_sparse(self):
45+
self.iris.X = csc_matrix(self.iris.X[::20])
46+
c = self.dbscan(self.iris)
47+
self.assertEqual(np.ndarray, type(c))
48+
self.assertEqual(len(self.iris), len(c))
49+
50+
def test_model(self):
51+
c = self.dbscan.get_model(self.iris)
52+
self.assertEqual(ClusteringModel, type(c))
53+
self.assertEqual(len(self.iris), len(c.labels))
54+
55+
self.assertRaises(NotImplementedError, c, self.iris)
56+
57+
def test_model_np(self):
58+
"""
59+
Test with numpy array as an input in model.
60+
"""
61+
c = self.dbscan.get_model(self.iris)
62+
self.assertRaises(NotImplementedError, c, self.iris.X)
63+
64+
def test_model_sparse(self):
65+
"""
66+
Test with sparse array as an input in model.
67+
"""
68+
c = self.dbscan.get_model(self.iris)
69+
self.assertRaises(NotImplementedError, c, csr_matrix(self.iris.X))
70+
71+
def test_model_instance(self):
72+
"""
73+
Test with instance as an input in model.
74+
"""
75+
c = self.dbscan.get_model(self.iris)
76+
self.assertRaises(NotImplementedError, c, self.iris[0])
77+
78+
def test_model_list(self):
79+
"""
80+
Test with list as an input in model.
81+
"""
82+
c = self.dbscan.get_model(self.iris)
83+
self.assertRaises(NotImplementedError, c, self.iris.X.tolist())
84+
85+
def test_model_bad_datatype(self):
86+
"""
87+
Check model with data-type that is not supported.
88+
"""
89+
c = self.dbscan.get_model(self.iris)
90+
self.assertRaises(TypeError, c, 10)

Orange/tests/test_clustering_kmeans.py

Lines changed: 102 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -2,12 +2,15 @@
22
# pylint: disable=missing-docstring
33

44
import unittest
5+
import warnings
56

67
import numpy as np
7-
from scipy.sparse import csc_matrix
8+
from scipy.sparse import csc_matrix, csr_matrix
89

910
import Orange
10-
from Orange.clustering.kmeans import KMeans
11+
from Orange.clustering.kmeans import KMeans, KMeansModel
12+
from Orange.data import Table, Domain, ContinuousVariable
13+
from Orange.data.table import DomainTransformationError
1114

1215

1316
class TestKMeans(unittest.TestCase):
@@ -18,25 +21,115 @@ def setUp(self):
1821
def test_kmeans(self):
1922
c = self.kmeans(self.iris)
2023
# First 20 iris belong to one cluster
24+
self.assertEqual(np.ndarray, type(c))
25+
self.assertEqual(len(self.iris), len(c))
2126
self.assertEqual(1, len(set(c[:20].ravel())))
2227

2328
def test_kmeans_parameters(self):
2429
kmeans = KMeans(n_clusters=10, max_iter=10, random_state=42, tol=0.001,
2530
init='random')
26-
kmeans(self.iris)
31+
c = kmeans(self.iris)
32+
self.assertEqual(np.ndarray, type(c))
33+
self.assertEqual(len(self.iris), len(c))
2734

2835
def test_predict_table(self):
29-
kmeans = KMeans()
30-
c = kmeans(self.iris)
36+
c = self.kmeans(self.iris)
3137
self.assertEqual(np.ndarray, type(c))
38+
self.assertEqual(len(self.iris), len(c))
3239

3340
def test_predict_numpy(self):
34-
kmeans = KMeans()
35-
c = kmeans.fit(self.iris.X)
41+
c = self.kmeans.fit(self.iris.X)
42+
self.assertEqual(KMeansModel, type(c))
3643
self.assertEqual(np.ndarray, type(c.labels))
44+
self.assertEqual(len(self.iris), len(c.labels))
3745

3846
def test_predict_sparse(self):
39-
kmeans = KMeans()
4047
self.iris.X = csc_matrix(self.iris.X[::20])
41-
c = kmeans(self.iris)
48+
c = self.kmeans(self.iris)
4249
self.assertEqual(np.ndarray, type(c))
50+
self.assertEqual(len(self.iris), len(c))
51+
52+
def test_model(self):
53+
c = self.kmeans.get_model(self.iris)
54+
self.assertEqual(KMeansModel, type(c))
55+
self.assertEqual(len(self.iris), len(c.labels))
56+
57+
c1 = c(self.iris)
58+
# prediction of the model must be same since data are same
59+
np.testing.assert_array_almost_equal(c.labels, c1)
60+
61+
def test_model_np(self):
62+
"""
63+
Test with numpy array as an input in model.
64+
"""
65+
c = self.kmeans.get_model(self.iris)
66+
c1 = c(self.iris.X)
67+
# prediction of the model must be same since data are same
68+
np.testing.assert_array_almost_equal(c.labels, c1)
69+
70+
def test_model_sparse(self):
71+
"""
72+
Test with sparse array as an input in model.
73+
"""
74+
c = self.kmeans.get_model(self.iris)
75+
c1 = c(csr_matrix(self.iris.X))
76+
# prediction of the model must be same since data are same
77+
np.testing.assert_array_almost_equal(c.labels, c1)
78+
79+
def test_model_instance(self):
80+
"""
81+
Test with instance as an input in model.
82+
"""
83+
c = self.kmeans.get_model(self.iris)
84+
c1 = c(self.iris[0])
85+
# prediction of the model must be same since data are same
86+
self.assertEqual(c1, c.labels[0])
87+
88+
def test_model_list(self):
89+
"""
90+
Test with list as an input in model.
91+
"""
92+
c = self.kmeans.get_model(self.iris)
93+
c1 = c(self.iris.X.tolist())
94+
# prediction of the model must be same since data are same
95+
np.testing.assert_array_almost_equal(c.labels, c1)
96+
97+
# example with a list of only one data item
98+
c1 = c(self.iris.X.tolist()[0])
99+
# prediction of the model must be same since data are same
100+
np.testing.assert_array_almost_equal(c.labels[0], c1)
101+
102+
def test_model_bad_datatype(self):
103+
"""
104+
Check model with data-type that is not supported.
105+
"""
106+
c = self.kmeans.get_model(self.iris)
107+
self.assertRaises(TypeError, c, 10)
108+
109+
def test_model_data_table_domain(self):
110+
"""
111+
Check model with data-type that is not supported.
112+
"""
113+
# ok domain
114+
data = Table(Domain(
115+
list(self.iris.domain.attributes) + [ContinuousVariable("a")]),
116+
np.concatenate((self.iris.X, np.ones((len(self.iris), 1))), axis=1))
117+
c = self.kmeans.get_model(self.iris)
118+
res = c(data)
119+
np.testing.assert_array_almost_equal(c.labels, res)
120+
121+
# totally different domain - should fail
122+
self.assertRaises(DomainTransformationError, c, Table("housing"))
123+
124+
def test_deprecated_silhouette(self):
125+
with warnings.catch_warnings(record=True) as w:
126+
KMeans(compute_silhouette_score=True)
127+
128+
assert len(w) == 1
129+
assert issubclass(w[-1].category, DeprecationWarning)
130+
131+
with warnings.catch_warnings(record=True) as w:
132+
KMeans(compute_silhouette_score=False)
133+
134+
assert len(w) == 1
135+
assert issubclass(w[-1].category, DeprecationWarning)
Lines changed: 113 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,113 @@
1+
# Test methods with long descriptive names can omit docstrings
2+
# pylint: disable=missing-docstring
3+
4+
import unittest
5+
6+
import numpy as np
7+
import networkx
8+
from scipy.sparse import csc_matrix, csr_matrix
9+
10+
from Orange.clustering import ClusteringModel, table_to_knn_graph
11+
from Orange.data import Table
12+
from Orange.clustering.louvain import Louvain
13+
14+
15+
class TestLouvain(unittest.TestCase):
16+
def setUp(self):
17+
self.iris = Table('iris')
18+
self.louvain = Louvain()
19+
20+
def test_louvain(self):
21+
c = self.louvain(self.iris)
22+
# First 20 iris belong to one cluster
23+
self.assertEqual(np.ndarray, type(c))
24+
self.assertEqual(len(self.iris), len(c))
25+
self.assertEqual(1, len(set(c[:20].ravel())))
26+
27+
def test_louvain_parameters(self):
28+
louvain = Louvain(
29+
k_neighbors=3, resolution=1.2, random_state=42, metric="l2")
30+
c = louvain(self.iris)
31+
self.assertEqual(np.ndarray, type(c))
32+
self.assertEqual(len(self.iris), len(c))
33+
34+
def test_predict_table(self):
35+
c = self.louvain(self.iris)
36+
self.assertEqual(np.ndarray, type(c))
37+
self.assertEqual(len(self.iris), len(c))
38+
39+
def test_predict_numpy(self):
40+
c = self.louvain.fit(self.iris.X)
41+
self.assertEqual(ClusteringModel, type(c))
42+
self.assertEqual(np.ndarray, type(c.labels))
43+
self.assertEqual(len(self.iris), len(c.labels))
44+
45+
def test_predict_sparse(self):
46+
self.iris.X = csc_matrix(self.iris.X[::5])
47+
c = self.louvain(self.iris)
48+
self.assertEqual(np.ndarray, type(c))
49+
self.assertEqual(len(self.iris), len(c))
50+
51+
def test_model(self):
52+
c = self.louvain.get_model(self.iris)
53+
self.assertEqual(ClusteringModel, type(c))
54+
self.assertEqual(len(self.iris), len(c.labels))
55+
56+
self.assertRaises(NotImplementedError, c, self.iris)
57+
58+
def test_model_np(self):
59+
"""
60+
Test with numpy array as an input in model.
61+
"""
62+
c = self.louvain.get_model(self.iris)
63+
self.assertRaises(NotImplementedError, c, self.iris.X)
64+
65+
def test_model_sparse(self):
66+
"""
67+
Test with sparse array as an input in model.
68+
"""
69+
c = self.louvain.get_model(self.iris)
70+
self.assertRaises(NotImplementedError, c, csr_matrix(self.iris.X))
71+
72+
def test_model_instance(self):
73+
"""
74+
Test with instance as an input in model.
75+
"""
76+
c = self.louvain.get_model(self.iris)
77+
self.assertRaises(NotImplementedError, c, self.iris[0])
78+
79+
def test_model_list(self):
80+
"""
81+
Test with list as an input in model.
82+
"""
83+
c = self.louvain.get_model(self.iris)
84+
self.assertRaises(NotImplementedError, c, self.iris.X.tolist())
85+
86+
def test_graph(self):
87+
"""
88+
Louvain accepts graphs too.
89+
:return:
90+
"""
91+
graph = table_to_knn_graph(self.iris.X, 30, "l2")
92+
self.assertIsNotNone(graph)
93+
self.assertEqual(networkx.Graph, type(graph), 1)
94+
95+
# basic clustering - get clusters
96+
c = self.louvain(graph)
97+
# First 20 iris belong to one cluster
98+
self.assertEqual(np.ndarray, type(c))
99+
self.assertEqual(len(self.iris), len(c))
100+
self.assertEqual(1, len(set(c[:20].ravel())))
101+
102+
# clustering - get model
103+
c = self.louvain.get_model(graph)
104+
# First 20 iris belong to one cluster
105+
self.assertEqual(ClusteringModel, type(c))
106+
self.assertEqual(len(self.iris), len(c.labels))
107+
108+
def test_model_bad_datatype(self):
109+
"""
110+
Check model with data-type that is not supported.
111+
"""
112+
c = self.louvain.get_model(self.iris)
113+
self.assertRaises(TypeError, c, 10)

Orange/tests/test_louvain.py

Lines changed: 0 additions & 18 deletions
This file was deleted.

0 commit comments

Comments
 (0)