Skip to content

Commit 44ca750

Browse files
committed
Set boruvka leaf size as primary leaf size over 3 (heuristic).
1 parent fbff154 commit 44ca750

File tree

3 files changed

+32
-20
lines changed

3 files changed

+32
-20
lines changed

hdbscan/_hdbscan_boruvka.c

Lines changed: 26 additions & 14 deletions
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.

hdbscan/_hdbscan_boruvka.pyx

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -159,10 +159,10 @@ cdef class KDTreeBoruvkaAlgorithm (object):
159159
cdef np.ndarray candidate_neighbor_arr
160160
cdef np.ndarray candidate_distance_arr
161161

162-
def __init__(self, tree, min_samples=5, metric='euclidean', **kwargs):
162+
def __init__(self, tree, min_samples=5, metric='euclidean', leaf_size=20, **kwargs):
163163

164164
self.core_dist_tree = tree
165-
self.tree = KDTree(tree.data, metric=metric, leaf_size=10)
165+
self.tree = KDTree(tree.data, metric=metric, leaf_size=leaf_size)
166166
self._data = np.array(self.tree.data)
167167
self._raw_data = self.tree.data
168168
self.node_bounds = self.tree.node_bounds
@@ -539,10 +539,10 @@ cdef class BallTreeBoruvkaAlgorithm (object):
539539
cdef np.ndarray candidate_neighbor_arr
540540
cdef np.ndarray candidate_distance_arr
541541

542-
def __init__(self, tree, min_samples=5, metric='euclidean', **kwargs):
542+
def __init__(self, tree, min_samples=5, metric='euclidean', leaf_size=20, **kwargs):
543543

544544
self.core_dist_tree = tree
545-
self.tree = BallTree(tree.data, metric=metric, leaf_size=10)
545+
self.tree = BallTree(tree.data, metric=metric, leaf_size=leaf_size)
546546
self._data = np.array(self.tree.data)
547547
self._raw_data = self.tree.data
548548
self.min_samples = min_samples

hdbscan/hdbscan_.py

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -143,7 +143,7 @@ def _hdbscan_boruvka_kdtree(X, min_cluster_size=5, min_samples=None, alpha=1.0,
143143
min_samples = min(dim - 1, min_samples)
144144

145145
tree = KDTree(X, metric=metric, leaf_size=leaf_size)
146-
alg = KDTreeBoruvkaAlgorithm(tree, min_samples, metric=metric)
146+
alg = KDTreeBoruvkaAlgorithm(tree, min_samples, metric=metric, leaf_size=leaf_size//3)
147147
min_spanning_tree = alg.spanning_tree()
148148

149149
return _tree_to_labels(X, min_spanning_tree, min_cluster_size) + (min_spanning_tree,)
@@ -156,7 +156,7 @@ def _hdbscan_boruvka_balltree(X, min_cluster_size=5, min_samples=None, alpha=1.0
156156
min_samples = min(dim - 1, min_samples)
157157

158158
tree = BallTree(X, metric=metric, leaf_size=leaf_size)
159-
alg = BallTreeBoruvkaAlgorithm(tree, min_samples, metric=metric)
159+
alg = BallTreeBoruvkaAlgorithm(tree, min_samples, metric=metric, leaf_size=leaf_size//3)
160160
min_spanning_tree = alg.spanning_tree()
161161

162162
return _tree_to_labels(X, min_spanning_tree, min_cluster_size) + (min_spanning_tree,)

0 commit comments

Comments
 (0)