Skip to content

Commit 0cb4104

Browse files
committed
In the case of no interna edges in the MST for DBCV follow the authors MATLAB code.
1 parent 4b2b2b2 commit 0cb4104

File tree

1 file changed

+15
-6
lines changed

1 file changed

+15
-6
lines changed

hdbscan/validity.py

Lines changed: 15 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -160,7 +160,20 @@ def internal_minimum_spanning_tree(mr_distances):
160160
# then convert back to boolean type.
161161
edge_selection = np.prod(np.in1d(min_span_tree.T[:2], vertices).reshape(
162162
(min_span_tree.shape[0], 2), order='F'), axis=1).astype(bool)
163-
edges = min_span_tree[edge_selection]
163+
164+
# Density sparseness is not well defined if there are no
165+
# internal edges (as per the referenced paper). However
166+
# MATLAB code from the original authors simply selects the
167+
# largest of *all* the edges in the case that there are
168+
# no internal edges, so we do the same here
169+
if np.any(edge_selection):
170+
# If there are any internal edges, then subselect them out
171+
edges = min_span_tree[edge_selection]
172+
else:
173+
# If there are no internal edges then we want to take the
174+
# max over all the edges that exist in the MST, so we simply
175+
# do nothing and return all the edges in the MST.
176+
pass
164177

165178
return vertices, edges
166179

@@ -323,11 +336,7 @@ def validity_index(X, labels, metric='euclidean',
323336

324337
mst_nodes[cluster_id], mst_edges[cluster_id] = \
325338
internal_minimum_spanning_tree(mr_distances)
326-
try:
327-
density_sparseness[cluster_id] = mst_edges[cluster_id].T[2].max()
328-
except ValueError:
329-
raise ValueError('Density Sparseness is not defined when the MST of'
330-
' a cluster has no internal edges!')
339+
density_sparseness[cluster_id] = mst_edges[cluster_id].T[2].max()
331340

332341
for i in range(max_cluster_id):
333342
internal_nodes_i = mst_nodes[i]

0 commit comments

Comments
 (0)