@@ -1201,16 +1201,45 @@ def build_landmark_op(self):
12011201 n_samples = self .data .shape [0 ]
12021202 rng = np .random .default_rng (self .random_state )
12031203 landmark_indices = rng .choice (n_samples , self .n_landmark , replace = False )
1204- data = (
1205- self .data if not hasattr (self , "data_nu" ) else self .data_nu
1206- ) # because of the scaling to review
1207- if (
1208- n_samples > 5000 and self .distance == "euclidean"
1209- ): # sklearn.euclidean_distances is faster than cdist for big dataset
1210- distances = euclidean_distances (data , data [landmark_indices ])
1204+ precomputed = getattr (self , "precomputed" , None )
1205+
1206+ if precomputed is not None :
1207+ # Use the precomputed affinities/distances directly to avoid Euclidean fallback
1208+ landmark_affinities = self .kernel [:, landmark_indices ]
1209+
1210+ if sparse .issparse (landmark_affinities ):
1211+ landmark_affinities = landmark_affinities .tocsr ()
1212+ cluster_assignments = np .asarray (
1213+ landmark_affinities .argmax (axis = 1 )
1214+ ).reshape (- 1 )
1215+ row_max = matrix .to_array (
1216+ landmark_affinities .max (axis = 1 )
1217+ ).reshape (- 1 )
1218+ else :
1219+ landmark_affinities = np .asarray (landmark_affinities )
1220+ cluster_assignments = np .argmax (landmark_affinities , axis = 1 )
1221+ row_max = np .max (landmark_affinities , axis = 1 )
1222+
1223+ if np .any (row_max == 0 ):
1224+ warnings .warn (
1225+ "Some samples have zero affinity to all randomly selected landmarks; "
1226+ "increase n_landmark or ensure the affinity matrix connects all points." ,
1227+ RuntimeWarning ,
1228+ )
1229+ self ._clusters = cluster_assignments
12111230 else :
1212- distances = cdist (data , data [landmark_indices ], metric = self .distance )
1213- self ._clusters = np .argmin (distances , axis = 1 )
1231+ data = (
1232+ self .data if not hasattr (self , "data_nu" ) else self .data_nu
1233+ ) # because of the scaling to review
1234+ if (
1235+ n_samples > 5000 and self .distance == "euclidean"
1236+ ): # sklearn.euclidean_distances is faster than cdist for big dataset
1237+ distances = euclidean_distances (data , data [landmark_indices ])
1238+ else :
1239+ distances = cdist (
1240+ data , data [landmark_indices ], metric = self .distance
1241+ )
1242+ self ._clusters = np .argmin (distances , axis = 1 )
12141243
12151244 else :
12161245 with _logger .log_task ("SVD" ):
0 commit comments