3030
3131from sklearn import datasets
3232
33+ import warnings
34+
3335n_clusters = 3
3436# X = generate_clustered_data(n_clusters=n_clusters, n_samples_per_cluster=50)
3537X , y = make_blobs (n_samples = 50 , random_state = 0 )
@@ -110,7 +112,7 @@ def test_hdbscan_sparse_distance_matrix():
110112 n_clusters_1 = len (set (labels )) - int (- 1 in labels ) # ignore noise
111113 assert_equal (n_clusters_1 , n_clusters )
112114
113- labels = HDBSCAN (metric = "precomputed" ).fit (D ).labels_
115+ labels = HDBSCAN (metric = "precomputed" , gen_min_span_tree = True ).fit (D ).labels_
114116 n_clusters_2 = len (set (labels )) - int (- 1 in labels )
115117 assert_equal (n_clusters_2 , n_clusters )
116118
@@ -124,6 +126,89 @@ def test_hdbscan_feature_vector():
124126 n_clusters_2 = len (set (labels )) - int (- 1 in labels )
125127 assert_equal (n_clusters_2 , n_clusters )
126128
129+ def test_hdbscan_prims_kdtree ():
130+ labels , p , persist , ctree , ltree , mtree = hdbscan (X , algorithm = 'prims_kdtree' )
131+ n_clusters_1 = len (set (labels )) - int (- 1 in labels )
132+ assert_equal (n_clusters_1 , n_clusters )
133+
134+ labels = HDBSCAN (algorithm = 'prims_kdtree' , gen_min_span_tree = True ).fit (X ).labels_
135+ n_clusters_2 = len (set (labels )) - int (- 1 in labels )
136+ assert_equal (n_clusters_2 , n_clusters )
137+
138+ assert_raises (ValueError ,
139+ hdbscan ,
140+ X ,
141+ algorithm = 'prims_kdtree' ,
142+ metric = 'russelrao' )
143+
144+ def test_hdbscan_prims_balltree ():
145+ labels , p , persist , ctree , ltree , mtree = hdbscan (X , algorithm = 'prims_balltree' )
146+ n_clusters_1 = len (set (labels )) - int (- 1 in labels )
147+ assert_equal (n_clusters_1 , n_clusters )
148+
149+ labels = HDBSCAN (algorithm = 'prims_balltree' , gen_min_span_tree = True ).fit (X ).labels_
150+ n_clusters_2 = len (set (labels )) - int (- 1 in labels )
151+ assert_equal (n_clusters_2 , n_clusters )
152+
153+ assert_raises (ValueError ,
154+ hdbscan ,
155+ X ,
156+ algorithm = 'prims_balltree' ,
157+ metric = 'cosine' )
158+
159+ def test_hdbscan_boruvka_kdtree ():
160+ labels , p , persist , ctree , ltree , mtree = hdbscan (X , algorithm = 'boruvka_kdtree' )
161+ n_clusters_1 = len (set (labels )) - int (- 1 in labels )
162+ assert_equal (n_clusters_1 , n_clusters )
163+
164+ labels = HDBSCAN (algorithm = 'boruvka_kdtree' , gen_min_span_tree = True ).fit (X ).labels_
165+ n_clusters_2 = len (set (labels )) - int (- 1 in labels )
166+ assert_equal (n_clusters_2 , n_clusters )
167+
168+ assert_raises (ValueError ,
169+ hdbscan ,
170+ X ,
171+ algorithm = 'boruvka_kdtree' ,
172+ metric = 'russelrao' )
173+
174+ def test_hdbscan_boruvka_balltree ():
175+ labels , p , persist , ctree , ltree , mtree = hdbscan (X , algorithm = 'boruvka_balltree' )
176+ n_clusters_1 = len (set (labels )) - int (- 1 in labels )
177+ assert_equal (n_clusters_1 , n_clusters )
178+
179+ labels = HDBSCAN (algorithm = 'boruvka_balltree' , gen_min_span_tree = True ).fit (X ).labels_
180+ n_clusters_2 = len (set (labels )) - int (- 1 in labels )
181+ assert_equal (n_clusters_2 , n_clusters )
182+
183+ assert_raises (ValueError ,
184+ hdbscan ,
185+ X ,
186+ algorithm = 'boruvka_balltree' ,
187+ metric = 'cosine' )
188+
189+
190+ def test_hdbscan_high_dimensional ():
191+ H , y = make_blobs (n_samples = 50 , random_state = 0 , n_features = 64 )
192+ # H, y = shuffle(X, y, random_state=7)
193+ H = StandardScaler ().fit_transform (H )
194+ labels , p , persist , ctree , ltree , mtree = hdbscan (H )
195+ n_clusters_1 = len (set (labels )) - int (- 1 in labels )
196+ assert_equal (n_clusters_1 , n_clusters )
197+
198+ labels = HDBSCAN (algorithm = 'best' , metric = 'seuclidean' , V = np .ones (H .shape [1 ])).fit (H ).labels_
199+ n_clusters_2 = len (set (labels )) - int (- 1 in labels )
200+ assert_equal (n_clusters_2 , n_clusters )
201+
202+ def test_hdbscan_best_balltree_metric ():
203+ labels , p , persist , ctree , ltree , mtree = hdbscan (X , metric = 'seuclidean' , V = np .ones (X .shape [1 ]))
204+ n_clusters_1 = len (set (labels )) - int (- 1 in labels )
205+ assert_equal (n_clusters_1 , n_clusters )
206+
207+ labels = HDBSCAN (metric = 'seuclidean' , V = np .ones (X .shape [1 ])).fit (X ).labels_
208+ n_clusters_2 = len (set (labels )) - int (- 1 in labels )
209+ assert_equal (n_clusters_2 , n_clusters )
210+
211+
127212def test_hdbscan_no_clusters ():
128213 labels , p , persist , ctree , ltree , mtree = hdbscan (X , min_cluster_size = len (X )+ 1 )
129214 n_clusters_1 = len (set (labels )) - int (- 1 in labels )
@@ -212,6 +297,42 @@ def test_tree_networkx_output_formats():
212297 if_networkx (clusterer .single_linkage_tree_ .to_networkx )()
213298 if_networkx (clusterer .minimum_spanning_tree_ .to_networkx )()
214299
300+ def test_hdbscan_outliers ():
301+ clusterer = HDBSCAN (gen_min_span_tree = True ).fit (X )
302+ scores = clusterer .outlier_scores_
303+ assert (scores is not None )
304+
305+ def test_hdbscan_unavailable_attributes ():
306+ clusterer = HDBSCAN (gen_min_span_tree = False )
307+ with warnings .catch_warnings (record = True ) as w :
308+ tree = clusterer .condensed_tree_
309+ assert (len (w ) > 0 )
310+ assert (tree is None )
311+ with warnings .catch_warnings (record = True ) as w :
312+ tree = clusterer .single_linkage_tree_
313+ assert (len (w ) > 0 )
314+ assert (tree is None )
315+ with warnings .catch_warnings (record = True ) as w :
316+ scores = clusterer .outlier_scores_
317+ assert (len (w ) > 0 )
318+ assert (scores is None )
319+ with warnings .catch_warnings (record = True ) as w :
320+ tree = clusterer .minimum_spanning_tree_
321+ assert (len (w ) > 0 )
322+ assert (tree is None )
323+
324+ def test_hdbscan_min_span_tree_availability ():
325+ clusterer = HDBSCAN ().fit (X )
326+ tree = clusterer .minimum_spanning_tree_
327+ assert (tree is None )
328+ D = distance .squareform (distance .pdist (X ))
329+ D /= np .max (D )
330+ HDBSCAN (metric = 'precomputed' ).fit (D )
331+ tree = clusterer .minimum_spanning_tree_
332+ assert (tree is None )
333+
334+
335+
215336def test_hdbscan_badargs ():
216337 assert_raises (ValueError ,
217338 hdbscan ,
@@ -246,6 +367,18 @@ def test_hdbscan_badargs():
246367 assert_raises (ValueError ,
247368 hdbscan ,
248369 X , metric = 'minkowski' , p = - 1 , algorithm = 'boruvka_balltree' )
370+ assert_raises (ValueError ,
371+ hdbscan ,
372+ X , metric = 'precomputed' , algorithm = 'boruvka_kdtree' )
373+ assert_raises (ValueError ,
374+ hdbscan ,
375+ X , metric = 'precomputed' , algorithm = 'prims_kdtree' )
376+ assert_raises (ValueError ,
377+ hdbscan ,
378+ X , metric = 'precomputed' , algorithm = 'prims_balltree' )
379+ assert_raises (ValueError ,
380+ hdbscan ,
381+ X , metric = 'precomputed' ,algorithm = 'boruvka_balltree' )
249382 assert_raises (ValueError ,
250383 hdbscan ,
251384 X , alpha = - 1 )
@@ -258,6 +391,9 @@ def test_hdbscan_badargs():
258391 assert_raises (TypeError ,
259392 hdbscan ,
260393 X , metric = 'minkowski' , p = None )
394+ assert_raises (ValueError ,
395+ hdbscan ,
396+ X , leaf_size = 0 )
261397
262398def test_hdbscan_sparse ():
263399
0 commit comments