11"""
22Tests for Robust Single Linkage clustering algorithm
33"""
4- #import pickle
4+ # import pickle
55from nose .tools import assert_less
66import numpy as np
77from scipy .spatial import distance
88from scipy import sparse
99from sklearn .utils .estimator_checks import check_estimator
10- from sklearn .utils .testing import assert_equal
11- from sklearn .utils .testing import assert_array_equal
12- from sklearn .utils .testing import assert_raises
13- from sklearn .utils .testing import assert_in
14- from sklearn .utils .testing import assert_not_in
15- from sklearn .utils .testing import assert_no_warnings
16- from sklearn .utils .testing import if_matplotlib
17- from hdbscan import RobustSingleLinkage
18- from hdbscan import robust_single_linkage
10+ from sklearn .utils .testing import (assert_equal ,
11+ assert_array_equal ,
12+ assert_raises ,
13+ assert_in ,
14+ assert_not_in ,
15+ assert_no_warnings ,
16+ if_matplotlib )
17+ from hdbscan import RobustSingleLinkage , robust_single_linkage
1918# from sklearn.cluster.tests.common import generate_clustered_data
2019
2120from sklearn import datasets
3231X = StandardScaler ().fit_transform (X )
3332# X = generate_clustered_data(n_clusters=n_clusters, n_samples_per_cluster=50)
3433
34+
3535def test_rsl_distance_matrix ():
3636 D = distance .squareform (distance .pdist (X ))
3737 D /= np .max (D )
3838
3939 labels , tree = robust_single_linkage (D , 0.4 , metric = 'precomputed' )
4040 # number of clusters, ignoring noise if present
41- n_clusters_1 = len (set (labels )) - int (- 1 in labels ) # ignore noise
41+ n_clusters_1 = len (set (labels )) - int (- 1 in labels ) # ignore noise
4242 assert_equal (n_clusters_1 , 2 )
4343
4444 labels = RobustSingleLinkage (metric = "precomputed" ).fit (D ).labels_
4545 n_clusters_2 = len (set (labels )) - int (- 1 in labels )
4646 assert_equal (n_clusters_2 , 2 )
4747
48+
4849def test_rsl_feature_vector ():
4950 labels , tree = robust_single_linkage (X , 0.4 )
5051 n_clusters_1 = len (set (labels )) - int (- 1 in labels )
@@ -54,6 +55,7 @@ def test_rsl_feature_vector():
5455 n_clusters_2 = len (set (labels )) - int (- 1 in labels )
5556 assert_equal (n_clusters_2 , n_clusters )
5657
58+
5759def test_rsl_callable_metric ():
5860 # metric is the function reference, not the string key.
5961 metric = distance .euclidean
@@ -66,19 +68,23 @@ def test_rsl_callable_metric():
6668 n_clusters_2 = len (set (labels )) - int (- 1 in labels )
6769 assert_equal (n_clusters_2 , n_clusters )
6870
71+
6972def test_rsl_input_lists ():
7073 X = [[1. , 2. ], [3. , 4. ]]
7174 RobustSingleLinkage ().fit (X ) # must not raise exception
7275
76+
7377def test_rsl_boruvka_balltree ():
7478 labels , tree = robust_single_linkage (X , 0.45 , algorithm = 'boruvka_balltree' )
7579 n_clusters_1 = len (set (labels )) - int (- 1 in labels )
7680 assert_equal (n_clusters_1 , n_clusters )
7781
78- labels = RobustSingleLinkage (cut = 0.45 , algorithm = 'boruvka_balltree' ).fit (X ).labels_
82+ labels = RobustSingleLinkage (cut = 0.45 ,
83+ algorithm = 'boruvka_balltree' ).fit (X ).labels_
7984 n_clusters_2 = len (set (labels )) - int (- 1 in labels )
8085 assert_equal (n_clusters_2 , n_clusters )
8186
87+
8288def test_rsl_prims_balltree ():
8389 labels , tree = robust_single_linkage (X , 0.4 , algorithm = 'prims_balltree' )
8490 n_clusters_1 = len (set (labels )) - int (- 1 in labels )
@@ -88,6 +94,7 @@ def test_rsl_prims_balltree():
8894 n_clusters_2 = len (set (labels )) - int (- 1 in labels )
8995 assert_equal (n_clusters_2 , n_clusters )
9096
97+
9198def test_rsl_prims_kdtree ():
9299 labels , tree = robust_single_linkage (X , 0.4 , algorithm = 'prims_kdtree' )
93100 n_clusters_1 = len (set (labels )) - int (- 1 in labels )
@@ -97,16 +104,19 @@ def test_rsl_prims_kdtree():
97104 n_clusters_2 = len (set (labels )) - int (- 1 in labels )
98105 assert_equal (n_clusters_2 , n_clusters )
99106
107+
100108def test_rsl_unavailable_hierarchy ():
101109 clusterer = RobustSingleLinkage ()
102110 with warnings .catch_warnings (record = True ) as w :
103111 tree = clusterer .cluster_hierarchy_
104- assert (len (w ) > 0 )
105- assert (tree is None )
112+ assert len (w ) > 0
113+ assert tree is None
114+
106115
107116def test_rsl_hierarchy ():
108117 clusterer = RobustSingleLinkage ().fit (X )
109- assert (clusterer .cluster_hierarchy_ is not None )
118+ assert clusterer .cluster_hierarchy_ is not None
119+
110120
111121def test_rsl_high_dimensional ():
112122 H , y = make_blobs (n_samples = 50 , random_state = 0 , n_features = 64 )
@@ -116,10 +126,13 @@ def test_rsl_high_dimensional():
116126 n_clusters_1 = len (set (labels )) - int (- 1 in labels )
117127 assert_equal (n_clusters_1 , n_clusters )
118128
119- labels = RobustSingleLinkage (cut = 5.5 , algorithm = 'best' , metric = 'seuclidean' , V = np .ones (H .shape [1 ])).fit (H ).labels_
129+ labels = RobustSingleLinkage (cut = 5.5 , algorithm = 'best' ,
130+ metric = 'seuclidean' ,
131+ V = np .ones (H .shape [1 ])).fit (H ).labels_
120132 n_clusters_2 = len (set (labels )) - int (- 1 in labels )
121133 assert_equal (n_clusters_2 , n_clusters )
122134
135+
123136def test_rsl_badargs ():
124137 assert_raises (ValueError ,
125138 robust_single_linkage ,
@@ -150,7 +163,8 @@ def test_rsl_badargs():
150163 X , 0.4 , metric = 'minkowski' , p = - 1 , algorithm = 'prims_balltree' )
151164 assert_raises (ValueError ,
152165 robust_single_linkage ,
153- X , 0.4 , metric = 'minkowski' , p = - 1 , algorithm = 'boruvka_balltree' )
166+ X , 0.4 , metric = 'minkowski' , p = - 1 ,
167+ algorithm = 'boruvka_balltree' )
154168 assert_raises (ValueError ,
155169 robust_single_linkage ,
156170 X , 0.4 , metric = 'precomputed' , algorithm = 'boruvka_kdtree' )
@@ -162,7 +176,7 @@ def test_rsl_badargs():
162176 X , 0.4 , metric = 'precomputed' , algorithm = 'prims_balltree' )
163177 assert_raises (ValueError ,
164178 robust_single_linkage ,
165- X , 0.4 , metric = 'precomputed' ,algorithm = 'boruvka_balltree' )
179+ X , 0.4 , metric = 'precomputed' , algorithm = 'boruvka_balltree' )
166180 assert_raises (ValueError ,
167181 robust_single_linkage ,
168182 X , 0.4 , alpha = - 1 )
@@ -182,6 +196,7 @@ def test_rsl_badargs():
182196 robust_single_linkage ,
183197 X , 0.4 , gamma = 0 )
184198
199+
185200def test_rsl_is_sklearn_estimator ():
186201
187- check_estimator (RobustSingleLinkage )
202+ check_estimator (RobustSingleLinkage )
0 commit comments