Skip to content

Commit 62fbfea

Browse files
committed
Add different algorithm support to the HDBSCAN object as well as the function.
1 parent 067609f commit 62fbfea

File tree

1 file changed

+14
-1
lines changed

1 file changed

+14
-1
lines changed

hdbscan/hdbscan_.py

Lines changed: 14 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -401,6 +401,17 @@ class HDBSCAN(BaseEstimator, ClusterMixin):
401401
See (K. Chaudhuri and S. Dasgupta "Rates of convergence
402402
for the cluster tree."). (default 1.0)
403403
404+
algorithm : string, optional
405+
Exactly which algorithm to use; hdbscan has variants specialised
406+
for different characteristics of the data. By default this is set
407+
to ``best`` which chooses the "best" algorithm given the nature of
408+
the data. You can force other options if you believe you know
409+
better. Options are:
410+
* ``small``
411+
* ``small_kdtree``
412+
* ``large_kdtree``
413+
* ``large_kdtree_fastcluster``
414+
* ``large_kdtree_low_memory``
404415
405416
gen_min_span_tree: bool, optional
406417
Whether to generate the minimum spanning tree with regard
@@ -442,13 +453,15 @@ class HDBSCAN(BaseEstimator, ClusterMixin):
442453
"""
443454

444455
def __init__(self, min_cluster_size=5, min_samples=None,
445-
metric='euclidean', alpha=1.0, p=None, gen_min_span_tree=False):
456+
metric='euclidean', alpha=1.0, p=None,
457+
algorithm='best', gen_min_span_tree=False):
446458
self.min_cluster_size = min_cluster_size
447459
self.min_samples = min_samples
448460
self.alpha = alpha
449461

450462
self.metric = metric
451463
self.p = p
464+
self.algorithm = algorithm
452465
self.gen_min_span_tree = gen_min_span_tree
453466

454467
self._condensed_tree = None

0 commit comments

Comments
 (0)