@@ -49,7 +49,7 @@ def _tree_to_labels(X, single_linkage_tree, min_cluster_size=10,
4949 cluster_selection_method = 'eom' ,
5050 allow_single_cluster = False ,
5151 match_reference_implementation = False ,
52- epsilon = None ):
52+ cluster_selection_epsilon = 0.0 ):
5353 """Converts a pretrained tree and cluster size into a
5454 set of labels and probabilities.
5555 """
@@ -61,7 +61,7 @@ def _tree_to_labels(X, single_linkage_tree, min_cluster_size=10,
6161 cluster_selection_method ,
6262 allow_single_cluster ,
6363 match_reference_implementation ,
64- epsilon )
64+ cluster_selection_epsilon )
6565
6666 return (labels , probabilities , stabilities , condensed_tree ,
6767 single_linkage_tree )
@@ -329,7 +329,7 @@ def check_precomputed_distance_matrix(X):
329329 check_array (tmp )
330330
331331
332- def hdbscan (X , min_cluster_size = 5 , min_samples = None , alpha = 1.0 , epsilon = None ,
332+ def hdbscan (X , min_cluster_size = 5 , min_samples = None , alpha = 1.0 , cluster_selection_epsilon = 0.0 ,
333333 metric = 'minkowski' , p = 2 , leaf_size = 40 ,
334334 algorithm = 'best' , memory = Memory (cachedir = None , verbose = 0 ),
335335 approx_min_span_tree = True , gen_min_span_tree = False ,
@@ -355,8 +355,9 @@ def hdbscan(X, min_cluster_size=5, min_samples=None, alpha=1.0, epsilon=None,
355355 to be considered as a core point. This includes the point itself.
356356 defaults to the min_cluster_size.
357357
358- epsilon: float, optional (default=None)
359- A threshold for cluster splits.
358+ cluster_selection_epsilon: float, optional (default=0.0)
359+ A distance threshold. Clusters below this value will be merged.
360+ See [3]_ for more information.
360361
361362 alpha : float, optional (default=1.0)
362363 A distance scaling parameter as used in robust single linkage.
@@ -476,6 +477,8 @@ def hdbscan(X, min_cluster_size=5, min_samples=None, alpha=1.0, epsilon=None,
476477 cluster tree. In Advances in Neural Information Processing Systems
477478 (pp. 343-351).
478479
480+ .. [3] Malzer, C., & Baum, M. (2019). HDBSCAN(ε^): An Alternative Cluster
481+ Extraction Method for HDBSCAN. arxiv preprint 1911.02282.
479482 """
480483 if min_samples is None :
481484 min_samples = min_cluster_size
@@ -490,13 +493,10 @@ def hdbscan(X, min_cluster_size=5, min_samples=None, alpha=1.0, epsilon=None,
490493 if min_cluster_size == 1 :
491494 raise ValueError ('Min cluster size must be greater than one' )
492495
493- if epsilon is None :
494- epsilon = 0.0
495-
496- if type (epsilon ) is int :
497- epsilon = float (epsilon )
496+ if type (cluster_selection_epsilon ) is int :
497+ cluster_selection_epsilon = float (cluster_selection_epsilon )
498498
499- if type (epsilon ) is not float or epsilon < 0.0 :
499+ if type (cluster_selection_epsilon ) is not float or cluster_selection_epsilon < 0.0 :
500500 raise ValueError ('Epsilon must be a float value greater than or equal to 0!' )
501501
502502 if not isinstance (alpha , float ) or alpha <= 0.0 :
@@ -631,7 +631,7 @@ def hdbscan(X, min_cluster_size=5, min_samples=None, alpha=1.0, epsilon=None,
631631 cluster_selection_method ,
632632 allow_single_cluster ,
633633 match_reference_implementation ,
634- epsilon ) + \
634+ cluster_selection_epsilon ) + \
635635 (result_min_span_tree ,)
636636
637637
@@ -671,6 +671,10 @@ class HDBSCAN(BaseEstimator, ClusterMixin):
671671 A distance scaling parameter as used in robust single linkage.
672672 See [3]_ for more information.
673673
674+ cluster_selection_epsilon: float, optional (default=0.0)
675+ A distance threshold. Clusters below this value will be merged.
676+ See [5]_ for more information.
677+
674678 algorithm : string, optional (default='best')
675679 Exactly which algorithm to use; hdbscan has variants specialised
676680 for different characteristics of the data. By default this is set
@@ -828,9 +832,12 @@ class HDBSCAN(BaseEstimator, ClusterMixin):
828832 Sander, J., 2014. Density-Based Clustering Validation. In SDM
829833 (pp. 839-847).
830834
835+ .. [5] Malzer, C., & Baum, M. (2019). HDBSCAN(ε^): An Alternative Cluster
836+ Extraction Method for HDBSCAN. arxiv preprint 1911.02282.
837+
831838 """
832839
833- def __init__ (self , min_cluster_size = 5 , min_samples = None , epsilon = None ,
840+ def __init__ (self , min_cluster_size = 5 , min_samples = None , cluster_selection_epsilon = 0.0 ,
834841 metric = 'euclidean' , alpha = 1.0 , p = None ,
835842 algorithm = 'best' , leaf_size = 40 ,
836843 memory = Memory (cachedir = None , verbose = 0 ),
@@ -844,7 +851,7 @@ def __init__(self, min_cluster_size=5, min_samples=None, epsilon=None,
844851 self .min_cluster_size = min_cluster_size
845852 self .min_samples = min_samples
846853 self .alpha = alpha
847- self .epsilon = epsilon
854+ self .cluster_selection_epsilon = cluster_selection_epsilon
848855 self .metric = metric
849856 self .p = p
850857 self .algorithm = algorithm
0 commit comments