@@ -638,26 +638,15 @@ def _data_transitions(self):
638638 def build_landmark_op (self ):
639639 """Build the landmark operator
640640
641- Sélectionne aléatoirement n_landmark points comme landmarks, puis assigne chaque point à son landmark le plus proche.
642-
643- with _logger.log_task("landmark operator"):
644- is_sparse = sparse.issparse(self.kernel)
645- # spectral clustering
646- with _logger.log_task("SVD"):
647- _, _, VT = randomized_svd(
648- self.diff_aff,
649- n_components=self.n_svd,
650- random_state=self.random_state,
651- )
652- with _logger.log_task("KMeans"):
653- kmeans = MiniBatchKMeans(
654- self.n_landmark,
655- init_size=3 * self.n_landmark,
656- n_init=1,
657- batch_size=10000,
658- random_state=self.random_state,
659- )
660- self._clusters = kmeans.fit_predict(self.diff_op.dot(VT.T))
641+
642+ Calculates spectral clusters on the kernel, and calculates transition
643+ probabilities between cluster centers by using transition probabilities
644+ between samples assigned to each cluster.
645+
646+ random_landmarking:
647+ This method randomly selects n_landmark points and assigns each sample to its nearest landmark
648+ using Euclidean distance .
649+
661650
662651 """
663652 if self .random_landmarking :
@@ -666,7 +655,7 @@ def build_landmark_op(self):
666655 n_samples = self .data .shape [0 ]
667656 rng = np .random .default_rng (self .random_state )
668657 landmark_indices = rng .choice (n_samples , self .n_landmark , replace = False )
669- data = self .data if not hasattr (self , 'data_nu' ) else self .data_nu
658+ data = self .data if not hasattr (self , 'data_nu' ) else self .data_nu # because of the scaling to review
670659 distances = cdist (data , data [landmark_indices ], metric = "euclidean" )
671660 if n_samples > 5000 : # sklearn.euclidean_distances is faster than cdist for big dataset
672661 distances = euclidean_distances (data , data [landmark_indices ])
0 commit comments