@@ -401,6 +401,17 @@ class HDBSCAN(BaseEstimator, ClusterMixin):
401401 See (K. Chaudhuri and S. Dasgupta "Rates of convergence
402402 for the cluster tree."). (default 1.0)
403403
404+ algorithm : string, optional
405+ Exactly which algorithm to use; hdbscan has variants specialised
406+ for different characteristics of the data. By default this is set
407+ to ``best`` which chooses the "best" algorithm given the nature of
408+ the data. You can force other options if you believe you know
409+ better. Options are:
410+ * ``small``
411+ * ``small_kdtree``
412+ * ``large_kdtree``
413+ * ``large_kdtree_fastcluster``
414+ * ``large_kdtree_low_memory``
404415
405416 gen_min_span_tree: bool, optional
406417 Whether to generate the minimum spanning tree with regard
@@ -442,13 +453,15 @@ class HDBSCAN(BaseEstimator, ClusterMixin):
442453 """
443454
444455 def __init__ (self , min_cluster_size = 5 , min_samples = None ,
445- metric = 'euclidean' , alpha = 1.0 , p = None , gen_min_span_tree = False ):
456+ metric = 'euclidean' , alpha = 1.0 , p = None ,
457+ algorithm = 'best' , gen_min_span_tree = False ):
446458 self .min_cluster_size = min_cluster_size
447459 self .min_samples = min_samples
448460 self .alpha = alpha
449461
450462 self .metric = metric
451463 self .p = p
464+ self .algorithm = algorithm
452465 self .gen_min_span_tree = gen_min_span_tree
453466
454467 self ._condensed_tree = None
0 commit comments