@@ -149,7 +149,7 @@ def _hdbscan_prims_balltree(X, min_samples=5, alpha=1.0,
149149
150150def _hdbscan_boruvka_kdtree (X , min_samples = 5 , alpha = 1.0 ,
151151 metric = 'minkowski' , p = 2 , leaf_size = 40 ,
152- approx_min_span_tree = False ,
152+ approx_min_span_tree = True ,
153153 gen_min_span_tree = False ):
154154 if metric == 'minkowski' :
155155 if p is None :
@@ -176,7 +176,7 @@ def _hdbscan_boruvka_kdtree(X, min_samples=5, alpha=1.0,
176176
177177def _hdbscan_boruvka_balltree (X , min_samples = 5 , alpha = 1.0 ,
178178 metric = 'minkowski' , p = 2 , leaf_size = 40 ,
179- approx_min_span_tree = False ,
179+ approx_min_span_tree = True ,
180180 gen_min_span_tree = False ):
181181 if metric == 'minkowski' :
182182 if p is None :
@@ -204,7 +204,7 @@ def _hdbscan_boruvka_balltree(X, min_samples=5, alpha=1.0,
204204def hdbscan (X , min_cluster_size = 5 , min_samples = None , alpha = 1.0 ,
205205 metric = 'minkowski' , p = 2 , leaf_size = 40 ,
206206 algorithm = 'best' , memory = Memory (cachedir = None , verbose = 0 ),
207- approx_min_span_tree = False , gen_min_span_tree = False ):
207+ approx_min_span_tree = True , gen_min_span_tree = False ):
208208 """Perform HDBSCAN clustering from a vector array or distance matrix.
209209
210210 Parameters
@@ -259,12 +259,13 @@ def hdbscan(X, min_cluster_size=5, min_samples=None, alpha=1.0,
259259 By default, no caching is done. If a string is given, it is the
260260 path to the caching directory.
261261
262- approx_min_span_tree : Bool ( optional)
262+ approx_min_span_tree : Bool, optional
263263 Whether to accept an only approximate minimum spanning tree.
264264 For some algorithms this can provide a significant speedup, but
265- the resulting clustering is of lower quality, and may have issues.
266- If you are willing to sacrifice correctness for speed you may want
267- to explore this; in general this should be left at the default False.
265+ the resulting clustering may be of marginally lower quality.
266+ If you are willing to sacrifice speed for correctness you may want
267+ to explore this; in general this should be left at the default True.
268+ (default True)
268269
269270 gen_min_span_tree : bool, optional
270271 Whether to generate the minimum spanning tree for later analysis.
@@ -276,7 +277,8 @@ def hdbscan(X, min_cluster_size=5, min_samples=None, alpha=1.0,
276277 Cluster labels for each point. Noisy samples are given the label -1.
277278
278279 probabilities : array [n_samples]
279- Cluster membership strengths for each point. Noisy samples are assigned 0.
280+ Cluster membership strengths for each point. Noisy samples are assigned
281+ 0.
280282
281283 condensed_tree : record array
282284 The condensed cluster hierarchy used to generate clusters.
@@ -459,10 +461,10 @@ class HDBSCAN(BaseEstimator, ClusterMixin):
459461 approx_min_span_tree : Bool, optional
460462 Whether to accept an only approximate minimum spanning tree.
461463 For some algorithms this can provide a significant speedup, but
462- the resulting clustering is of lower quality, and may have issues .
463- If you are willing to sacrifice correctness for speed you may want
464- to explore this; in general this should be left at the default False .
465- (default False )
464+ the resulting clustering may be of marginally lower quality.
465+ If you are willing to sacrifice speed for correctness you may want
466+ to explore this; in general this should be left at the default True .
467+ (default True )
466468
467469 gen_min_span_tree: bool, optional
468470 Whether to generate the minimum spanning tree with regard
@@ -516,7 +518,7 @@ def __init__(self, min_cluster_size=5, min_samples=None,
516518 metric = 'euclidean' , alpha = 1.0 , p = None ,
517519 algorithm = 'best' , leaf_size = 40 ,
518520 memory = Memory (cachedir = None , verbose = 0 ),
519- approx_min_span_tree = False ,
521+ approx_min_span_tree = True ,
520522 gen_min_span_tree = False ):
521523 self .min_cluster_size = min_cluster_size
522524 self .min_samples = min_samples
0 commit comments