@@ -131,20 +131,32 @@ def _hdbscan_sparse_distance_matrix(X, min_samples=5, alpha=1.0,
131
131
metric = 'minkowski' , p = 2 , leaf_size = 40 ,
132
132
gen_min_span_tree = False , ** kwargs ):
133
133
assert issparse (X )
134
+ # Check for connected component on X
135
+ if csgraph .connected_components (X , directed = False , return_labels = False ) > 1 :
136
+ raise ValueError ('Sparse distance matrix has multiple connected '
137
+ 'components!\n That is, there exist groups of points '
138
+ 'that are completely disjoint -- there are no distance '
139
+ 'relations connecting them\n '
140
+ 'Run hdbscan on each component.' )
134
141
135
142
lil_matrix = X .tolil ()
136
143
137
144
# Compute sparse mutual reachability graph
145
+ # if max_dist > 0, max distance to use when the reachability is infinite
146
+ max_dist = kwargs .get ("max_dist" , 0. )
138
147
mutual_reachability_ = sparse_mutual_reachability (lil_matrix ,
139
- min_points = min_samples )
140
-
148
+ min_points = min_samples ,
149
+ max_dist = max_dist )
150
+ # Check connected component on mutual reachability
151
+ # If more than one component, it means that even if the distance matrix X
152
+ # has one component, there exists with less than `min_samples` neighbors
141
153
if csgraph .connected_components (mutual_reachability_ , directed = False ,
142
154
return_labels = False ) > 1 :
143
- raise ValueError ('Sparse distance matrix has multiple connected '
144
- ' components! \n That is, there exist groups of points '
145
- 'that are completely disjoint -- there are no distance '
146
- 'relations connecting them \n '
147
- 'Run hdbscan on each component.' )
155
+ raise ValueError (( 'There exists points with less than %s neighbors. '
156
+ 'Ensure your distance matrix has non zeros values for '
157
+ 'at least `min_sample`=%s neighbors for each points (i.e. K-nn graph), '
158
+ 'or specify a `max_dist` to use when distances are missing.' )
159
+ % ( min_samples , min_samples ) )
148
160
149
161
# Compute the minimum spanning tree for the sparse graph
150
162
sparse_min_spanning_tree = csgraph .minimum_spanning_tree (
0 commit comments