Skip to content

Commit 2d6b629

Browse files
committed
Clustering: Additional tests for clustering methods
1 parent 27634c5 commit 2d6b629

File tree

4 files changed

+307
-29
lines changed

4 files changed

+307
-29
lines changed

Orange/tests/test_clustering_dbscan.py

Lines changed: 69 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -4,7 +4,9 @@
44
import unittest
55

66
import numpy as np
7+
from scipy.sparse import csc_matrix, csr_matrix
78

9+
from Orange.clustering.clustering import ClusteringModel
810
from Orange.data import Table
911
from Orange.clustering.dbscan import DBSCAN
1012

@@ -14,15 +16,81 @@ def setUp(self):
1416
self.iris = Table('iris')
1517
self.dbscan = DBSCAN()
1618

19+
def test_dbscan(self):
20+
c = self.dbscan(self.iris)
21+
# First 20 iris belong to one cluster
22+
self.assertEqual(np.ndarray, type(c))
23+
self.assertEqual(len(self.iris), len(c))
24+
self.assertEqual(1, len(set(c[:20].ravel())))
25+
1726
def test_dbscan_parameters(self):
1827
dbscan = DBSCAN(eps=0.1, min_samples=7, metric='euclidean',
1928
algorithm='auto', leaf_size=12, p=None)
20-
dbscan(self.iris)
29+
c = dbscan(self.iris)
30+
self.assertEqual(np.ndarray, type(c))
31+
self.assertEqual(len(self.iris), len(c))
2132

2233
def test_predict_table(self):
2334
pred = self.dbscan(self.iris)
2435
self.assertEqual(np.ndarray, type(pred))
36+
self.assertEqual(len(self.iris), len(pred))
2537

2638
def test_predict_numpy(self):
2739
model = self.dbscan.fit(self.iris.X)
40+
self.assertEqual(ClusteringModel, type(model))
2841
self.assertEqual(np.ndarray, type(model.labels))
42+
self.assertEqual(len(self.iris), len(model.labels))
43+
44+
def test_predict_sparse_csc(self):
45+
self.iris.X = csc_matrix(self.iris.X[::20])
46+
c = self.dbscan(self.iris)
47+
self.assertEqual(np.ndarray, type(c))
48+
self.assertEqual(len(self.iris), len(c))
49+
50+
def test_predict_spares_csr(self):
51+
self.iris.X = csr_matrix(self.iris.X[::20])
52+
c = self.dbscan(self.iris)
53+
self.assertEqual(np.ndarray, type(c))
54+
self.assertEqual(len(self.iris), len(c))
55+
56+
def test_model(self):
57+
c = self.dbscan.get_model(self.iris)
58+
self.assertEqual(ClusteringModel, type(c))
59+
self.assertEqual(len(self.iris), len(c.labels))
60+
61+
self.assertRaises(NotImplementedError, c, self.iris)
62+
63+
def test_model_np(self):
64+
"""
65+
Test with numpy array as an input in model.
66+
"""
67+
c = self.dbscan.get_model(self.iris)
68+
self.assertRaises(NotImplementedError, c, self.iris.X)
69+
70+
def test_model_sparse(self):
71+
"""
72+
Test with sparse array as an input in model.
73+
"""
74+
c = self.dbscan.get_model(self.iris)
75+
self.assertRaises(NotImplementedError, c, csr_matrix(self.iris.X))
76+
77+
def test_model_instance(self):
78+
"""
79+
Test with instance as an input in model.
80+
"""
81+
c = self.dbscan.get_model(self.iris)
82+
self.assertRaises(NotImplementedError, c, self.iris[0])
83+
84+
def test_model_list(self):
85+
"""
86+
Test with list as an input in model.
87+
"""
88+
c = self.dbscan.get_model(self.iris)
89+
self.assertRaises(NotImplementedError, c, self.iris.X.tolist())
90+
91+
def test_model_bad_datatype(self):
92+
"""
93+
Check model with data-type that is not supported.
94+
"""
95+
c = self.dbscan.get_model(self.iris)
96+
self.assertRaises(TypeError, c, 10)

Orange/tests/test_clustering_kmeans.py

Lines changed: 118 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -2,12 +2,15 @@
22
# pylint: disable=missing-docstring
33

44
import unittest
5+
import warnings
56

67
import numpy as np
7-
from scipy.sparse import csc_matrix
8+
from scipy.sparse import csc_matrix, csr_matrix
89

910
import Orange
10-
from Orange.clustering.kmeans import KMeans
11+
from Orange.clustering.kmeans import KMeans, KMeansModel
12+
from Orange.data import Table, Domain, ContinuousVariable
13+
from Orange.data.table import DomainTransformationError
1114

1215

1316
class TestKMeans(unittest.TestCase):
@@ -18,25 +21,130 @@ def setUp(self):
1821
def test_kmeans(self):
1922
c = self.kmeans(self.iris)
2023
# First 20 iris belong to one cluster
24+
self.assertEqual(np.ndarray, type(c))
25+
self.assertEqual(len(self.iris), len(c))
2126
self.assertEqual(1, len(set(c[:20].ravel())))
2227

2328
def test_kmeans_parameters(self):
2429
kmeans = KMeans(n_clusters=10, max_iter=10, random_state=42, tol=0.001,
2530
init='random')
26-
kmeans(self.iris)
31+
c = kmeans(self.iris)
32+
self.assertEqual(np.ndarray, type(c))
33+
self.assertEqual(len(self.iris), len(c))
2734

2835
def test_predict_table(self):
29-
kmeans = KMeans()
30-
c = kmeans(self.iris)
36+
c = self.kmeans(self.iris)
3137
self.assertEqual(np.ndarray, type(c))
38+
self.assertEqual(len(self.iris), len(c))
3239

3340
def test_predict_numpy(self):
34-
kmeans = KMeans()
35-
c = kmeans.fit(self.iris.X)
41+
c = self.kmeans.fit(self.iris.X)
42+
self.assertEqual(KMeansModel, type(c))
3643
self.assertEqual(np.ndarray, type(c.labels))
44+
self.assertEqual(len(self.iris), len(c.labels))
3745

38-
def test_predict_sparse(self):
39-
kmeans = KMeans()
46+
def test_predict_sparse_csc(self):
4047
self.iris.X = csc_matrix(self.iris.X[::20])
41-
c = kmeans(self.iris)
48+
c = self.kmeans(self.iris)
4249
self.assertEqual(np.ndarray, type(c))
50+
self.assertEqual(len(self.iris), len(c))
51+
52+
def test_predict_spares_csr(self):
53+
self.iris.X = csr_matrix(self.iris.X[::20])
54+
c = self.kmeans(self.iris)
55+
self.assertEqual(np.ndarray, type(c))
56+
self.assertEqual(len(self.iris), len(c))
57+
58+
def test_model(self):
59+
c = self.kmeans.get_model(self.iris)
60+
self.assertEqual(KMeansModel, type(c))
61+
self.assertEqual(len(self.iris), len(c.labels))
62+
63+
c1 = c(self.iris)
64+
# prediction of the model must be same since data are same
65+
np.testing.assert_array_almost_equal(c.labels, c1)
66+
67+
def test_model_np(self):
68+
"""
69+
Test with numpy array as an input in model.
70+
"""
71+
c = self.kmeans.get_model(self.iris)
72+
c1 = c(self.iris.X)
73+
# prediction of the model must be same since data are same
74+
np.testing.assert_array_almost_equal(c.labels, c1)
75+
76+
def test_model_sparse_csc(self):
77+
"""
78+
Test with sparse array as an input in model.
79+
"""
80+
c = self.kmeans.get_model(self.iris)
81+
c1 = c(csc_matrix(self.iris.X))
82+
# prediction of the model must be same since data are same
83+
np.testing.assert_array_almost_equal(c.labels, c1)
84+
85+
def test_model_sparse_csr(self):
86+
"""
87+
Test with sparse array as an input in model.
88+
"""
89+
c = self.kmeans.get_model(self.iris)
90+
c1 = c(csr_matrix(self.iris.X))
91+
# prediction of the model must be same since data are same
92+
np.testing.assert_array_almost_equal(c.labels, c1)
93+
94+
def test_model_instance(self):
95+
"""
96+
Test with instance as an input in model.
97+
"""
98+
c = self.kmeans.get_model(self.iris)
99+
c1 = c(self.iris[0])
100+
# prediction of the model must be same since data are same
101+
self.assertEqual(c1, c.labels[0])
102+
103+
def test_model_list(self):
104+
"""
105+
Test with list as an input in model.
106+
"""
107+
c = self.kmeans.get_model(self.iris)
108+
c1 = c(self.iris.X.tolist())
109+
# prediction of the model must be same since data are same
110+
np.testing.assert_array_almost_equal(c.labels, c1)
111+
112+
# example with a list of only one data item
113+
c1 = c(self.iris.X.tolist()[0])
114+
# prediction of the model must be same since data are same
115+
np.testing.assert_array_almost_equal(c.labels[0], c1)
116+
117+
def test_model_bad_datatype(self):
118+
"""
119+
Check model with data-type that is not supported.
120+
"""
121+
c = self.kmeans.get_model(self.iris)
122+
self.assertRaises(TypeError, c, 10)
123+
124+
def test_model_data_table_domain(self):
125+
"""
126+
Check model with data-type that is not supported.
127+
"""
128+
# ok domain
129+
data = Table(Domain(
130+
list(self.iris.domain.attributes) + [ContinuousVariable("a")]),
131+
np.concatenate((self.iris.X, np.ones((len(self.iris), 1))), axis=1))
132+
c = self.kmeans.get_model(self.iris)
133+
res = c(data)
134+
np.testing.assert_array_almost_equal(c.labels, res)
135+
136+
# totally different domain - should fail
137+
self.assertRaises(DomainTransformationError, c, Table("housing"))
138+
139+
def test_deprecated_silhouette(self):
140+
with warnings.catch_warnings(record=True) as w:
141+
KMeans(compute_silhouette_score=True)
142+
143+
assert len(w) == 1
144+
assert issubclass(w[-1].category, DeprecationWarning)
145+
146+
with warnings.catch_warnings(record=True) as w:
147+
KMeans(compute_silhouette_score=False)
148+
149+
assert len(w) == 1
150+
assert issubclass(w[-1].category, DeprecationWarning)
Lines changed: 120 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,120 @@
1+
# Test methods with long descriptive names can omit docstrings
2+
# pylint: disable=missing-docstring
3+
4+
import unittest
5+
6+
import numpy as np
7+
import networkx
8+
from scipy.sparse import csc_matrix, csr_matrix
9+
10+
from Orange.clustering.clustering import ClusteringModel
11+
from Orange.clustering.louvain import matrix_to_knn_graph
12+
from Orange.data import Table
13+
from Orange.clustering.louvain import Louvain
14+
15+
16+
class TestLouvain(unittest.TestCase):
17+
def setUp(self):
18+
self.iris = Table('iris')
19+
self.louvain = Louvain()
20+
21+
def test_louvain(self):
22+
c = self.louvain(self.iris)
23+
# First 20 iris belong to one cluster
24+
self.assertEqual(np.ndarray, type(c))
25+
self.assertEqual(len(self.iris), len(c))
26+
self.assertEqual(1, len(set(c[:20].ravel())))
27+
28+
def test_louvain_parameters(self):
29+
louvain = Louvain(
30+
k_neighbors=3, resolution=1.2, random_state=42, metric="l2")
31+
c = louvain(self.iris)
32+
self.assertEqual(np.ndarray, type(c))
33+
self.assertEqual(len(self.iris), len(c))
34+
35+
def test_predict_table(self):
36+
c = self.louvain(self.iris)
37+
self.assertEqual(np.ndarray, type(c))
38+
self.assertEqual(len(self.iris), len(c))
39+
40+
def test_predict_numpy(self):
41+
c = self.louvain.fit(self.iris.X)
42+
self.assertEqual(ClusteringModel, type(c))
43+
self.assertEqual(np.ndarray, type(c.labels))
44+
self.assertEqual(len(self.iris), len(c.labels))
45+
46+
def test_predict_sparse_csc(self):
47+
self.iris.X = csc_matrix(self.iris.X[::5])
48+
c = self.louvain(self.iris)
49+
self.assertEqual(np.ndarray, type(c))
50+
self.assertEqual(len(self.iris), len(c))
51+
52+
def test_predict_spares_csr(self):
53+
self.iris.X = csr_matrix(self.iris.X[::5])
54+
c = self.louvain(self.iris)
55+
self.assertEqual(np.ndarray, type(c))
56+
self.assertEqual(len(self.iris), len(c))
57+
58+
def test_model(self):
59+
c = self.louvain.get_model(self.iris)
60+
self.assertEqual(ClusteringModel, type(c))
61+
self.assertEqual(len(self.iris), len(c.labels))
62+
63+
self.assertRaises(NotImplementedError, c, self.iris)
64+
65+
def test_model_np(self):
66+
"""
67+
Test with numpy array as an input in model.
68+
"""
69+
c = self.louvain.get_model(self.iris)
70+
self.assertRaises(NotImplementedError, c, self.iris.X)
71+
72+
def test_model_sparse(self):
73+
"""
74+
Test with sparse array as an input in model.
75+
"""
76+
c = self.louvain.get_model(self.iris)
77+
self.assertRaises(NotImplementedError, c, csr_matrix(self.iris.X))
78+
79+
def test_model_instance(self):
80+
"""
81+
Test with instance as an input in model.
82+
"""
83+
c = self.louvain.get_model(self.iris)
84+
self.assertRaises(NotImplementedError, c, self.iris[0])
85+
86+
def test_model_list(self):
87+
"""
88+
Test with list as an input in model.
89+
"""
90+
c = self.louvain.get_model(self.iris)
91+
self.assertRaises(NotImplementedError, c, self.iris.X.tolist())
92+
93+
def test_graph(self):
94+
"""
95+
Louvain accepts graphs too.
96+
:return:
97+
"""
98+
graph = matrix_to_knn_graph(self.iris.X, 30, "l2")
99+
self.assertIsNotNone(graph)
100+
self.assertEqual(networkx.Graph, type(graph), 1)
101+
102+
# basic clustering - get clusters
103+
c = self.louvain(graph)
104+
# First 20 iris belong to one cluster
105+
self.assertEqual(np.ndarray, type(c))
106+
self.assertEqual(len(self.iris), len(c))
107+
self.assertEqual(1, len(set(c[:20].ravel())))
108+
109+
# clustering - get model
110+
c = self.louvain.get_model(graph)
111+
# First 20 iris belong to one cluster
112+
self.assertEqual(ClusteringModel, type(c))
113+
self.assertEqual(len(self.iris), len(c.labels))
114+
115+
def test_model_bad_datatype(self):
116+
"""
117+
Check model with data-type that is not supported.
118+
"""
119+
c = self.louvain.get_model(self.iris)
120+
self.assertRaises(TypeError, c, 10)

Orange/tests/test_louvain.py

Lines changed: 0 additions & 18 deletions
This file was deleted.

0 commit comments

Comments
 (0)