Skip to content

Commit 4b3ebea

Browse files
committed
More boruvka docs.
1 parent 46d1c62 commit 4b3ebea

File tree

1 file changed

+6
-0
lines changed

1 file changed

+6
-0
lines changed

hdbscan/_hdbscan_boruvka.pyx

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -351,6 +351,9 @@ cdef class KDTreeBoruvkaAlgorithm (object):
351351
cdef np.ndarray[np.double_t, ndim=2] knn_dist
352352
cdef np.ndarray[np.intp_t, ndim=2] knn_indices
353353

354+
# A shortcut: if we have a lot of points then we can split the points into
355+
# four piles and query them in parallel. On multicore systems (most systems)
356+
# this amounts to a 2x-3x wall clock improvement.
354357
if self.tree.data.shape[0] > 16384:
355358
datasets = [np.asarray(self.tree.data[0:self.num_points//4]),
356359
np.asarray(self.tree.data[self.num_points//4:self.num_points//2]),
@@ -370,6 +373,9 @@ cdef class KDTreeBoruvkaAlgorithm (object):
370373
self.core_distance_arr = knn_dist[:, self.min_samples - 1].copy()
371374
self.core_distance = (<np.double_t [:self.num_points:1]> (<np.double_t *> self.core_distance_arr.data))
372375

376+
# Since we do everything in terms of rdist to free up the GIL
377+
# we need to convert all the core distances beforehand
378+
# to make comparison feasible.
373379
for n in range(self.num_points):
374380
self.core_distance[n] = self.dist._dist_to_rdist(self.core_distance[n])
375381

0 commit comments

Comments
 (0)