@@ -71,6 +71,11 @@ def _hdbscan_generic(X, min_samples=5, alpha=1.0, metric='minkowski', p=2,
7171 distance_matrix = pairwise_distances (X , metric = metric , p = p )
7272 elif metric == 'arccos' :
7373 distance_matrix = pairwise_distances (X , metric = 'cosine' , ** kwargs )
74+ elif metric == 'precomputed' :
75+ # Treating this case explicitly instead of letting sklearn.metrics.pairwise_distances handle it enables
76+ # the usage of numpy.inf in the distance matrix to indicate missing information.
77+ # TODO: Check if copying is necessary
78+ distance_matrix = X .copy ()
7479 else :
7580 distance_matrix = pairwise_distances (X , metric = metric , ** kwargs )
7681
@@ -282,6 +287,14 @@ def _hdbscan_boruvka_balltree(X, min_samples=5, alpha=1.0,
282287 return single_linkage_tree , None
283288
284289
290+ def check_precomputed_distance_matrix (X ):
291+ """Perform check_array(X) after removing infinite values (numpy.inf) from the given distance matrix.
292+ """
293+ tmp = X .copy ()
294+ tmp [np .isinf (tmp )] = 1
295+ check_array (tmp )
296+
297+
285298def hdbscan (X , min_cluster_size = 5 , min_samples = None , alpha = 1.0 ,
286299 metric = 'minkowski' , p = 2 , leaf_size = 40 ,
287300 algorithm = 'best' , memory = Memory (cachedir = None , verbose = 0 ),
@@ -464,7 +477,10 @@ def hdbscan(X, min_cluster_size=5, min_samples=None, alpha=1.0,
464477 'Should be one of: "eom", "leaf"\n ' )
465478
466479 # Checks input and converts to an nd-array where possible
467- X = check_array (X , accept_sparse = 'csr' )
480+ if metric != 'precomputed' :
481+ X = check_array (X , accept_sparse = 'csr' )
482+ else :
483+ check_precomputed_distance_matrix (X )
468484 # Python 2 and 3 compliant string_type checking
469485 if isinstance (memory , six .string_types ):
470486 memory = Memory (cachedir = memory , verbose = 0 )
@@ -798,9 +814,11 @@ def fit(self, X, y=None):
798814 self : object
799815 Returns self
800816 """
801- X = check_array (X , accept_sparse = 'csr' )
802817 if self .metric != 'precomputed' :
818+ X = check_array (X , accept_sparse = 'csr' )
803819 self ._raw_data = X
820+ else :
821+ check_precomputed_distance_matrix (X )
804822
805823 kwargs = self .get_params ()
806824 # prediction data only applies to the persistent model, so remove
0 commit comments