@@ -649,24 +649,21 @@ def build_landmark_op(self):
649649
650650
651651 """
652- if self .random_landmarking :
653- with _logger .log_task ("landmark operator" ):
654- is_sparse = sparse .issparse (self .kernel )
652+ with _logger .log_task ("landmark operator" ):
653+ is_sparse = sparse .issparse (self .kernel )
654+
655+ if self .random_landmark :
655656 n_samples = self .data .shape [0 ]
656657 rng = np .random .default_rng (self .random_state )
657658 landmark_indices = rng .choice (n_samples , self .n_landmark , replace = False )
658- data = self .data if not hasattr (self , 'data_nu' ) else self .data_nu # because of the scaling to review
659- distances = cdist (data , data [landmark_indices ], metric = "euclidean" )
660- if n_samples > 5000 : # sklearn.euclidean_distances is faster than cdist for big dataset
661- distances = euclidean_distances (data , data [landmark_indices ])
662- else :
663- distances = cdist (data , data [landmark_indices ], metric = "euclidean" )
659+ data = self .data if not hasattr (self , 'data_nu' ) else self .data_nu
660+ # if n_samples > 5000 and self.distance == "euclidean": ( sklearn.euclidean_distances is faster than cdist for big dataset)
661+ # distances = euclidean_distances(data, data[landmark_indices])
662+ # this is a futur optimization for the euclidean case
663+ distances = cdist (data , data [landmark_indices ], metric = self .distance )
664664 self ._clusters = np .argmin (distances , axis = 1 )
665665
666- else :
667- with _logger .log_task ("landmark operator" ):
668- is_sparse = sparse .issparse (self .kernel )
669- # spectral clustering
666+ else :
670667 with _logger .log_task ("SVD" ):
671668 _ , _ , VT = randomized_svd (
672669 self .diff_aff ,
0 commit comments