@@ -131,20 +131,32 @@ def _hdbscan_sparse_distance_matrix(X, min_samples=5, alpha=1.0,
131131 metric = 'minkowski' , p = 2 , leaf_size = 40 ,
132132 gen_min_span_tree = False , ** kwargs ):
133133 assert issparse (X )
134+ # Check for connected component on X
135+ if csgraph .connected_components (X , directed = False , return_labels = False ) > 1 :
136+ raise ValueError ('Sparse distance matrix has multiple connected '
137+ 'components!\n That is, there exist groups of points '
138+ 'that are completely disjoint -- there are no distance '
139+ 'relations connecting them\n '
140+ 'Run hdbscan on each component.' )
134141
135142 lil_matrix = X .tolil ()
136143
137144 # Compute sparse mutual reachability graph
145+ # if max_dist > 0, max distance to use when the reachability is infinite
146+ max_dist = kwargs .get ("max_dist" , 0. )
138147 mutual_reachability_ = sparse_mutual_reachability (lil_matrix ,
139- min_points = min_samples )
140-
148+ min_points = min_samples ,
149+ max_dist = max_dist )
150+ # Check connected component on mutual reachability
151+ # If more than one component, it means that even if the distance matrix X
152+ # has one component, there exists with less than `min_samples` neighbors
141153 if csgraph .connected_components (mutual_reachability_ , directed = False ,
142154 return_labels = False ) > 1 :
143- raise ValueError ('Sparse distance matrix has multiple connected '
144- ' components! \n That is, there exist groups of points '
145- 'that are completely disjoint -- there are no distance '
146- 'relations connecting them \n '
147- 'Run hdbscan on each component.' )
155+ raise ValueError (( 'There exists points with less than %s neighbors. '
156+ 'Ensure your distance matrix has non zeros values for '
157+ 'at least `min_sample`=%s neighbors for each points (i.e. K-nn graph), '
158+ 'or specify a `max_dist` to use when distances are missing.' )
159+ % ( min_samples , min_samples ) )
148160
149161 # Compute the minimum spanning tree for the sparse graph
150162 sparse_min_spanning_tree = csgraph .minimum_spanning_tree (
0 commit comments