Skip to content

Commit 5d9c9d0

Browse files
author
Mohamed-Elyes Kanoun
committed
Adding random landmarking -
1 parent e169ec5 commit 5d9c9d0

File tree

1 file changed

+10
-21
lines changed

1 file changed

+10
-21
lines changed

graphtools/graphs.py

Lines changed: 10 additions & 21 deletions
Original file line numberDiff line numberDiff line change
@@ -638,26 +638,15 @@ def _data_transitions(self):
638638
def build_landmark_op(self):
639639
"""Build the landmark operator
640640
641-
Sélectionne aléatoirement n_landmark points comme landmarks, puis assigne chaque point à son landmark le plus proche.
642-
643-
with _logger.log_task("landmark operator"):
644-
is_sparse = sparse.issparse(self.kernel)
645-
# spectral clustering
646-
with _logger.log_task("SVD"):
647-
_, _, VT = randomized_svd(
648-
self.diff_aff,
649-
n_components=self.n_svd,
650-
random_state=self.random_state,
651-
)
652-
with _logger.log_task("KMeans"):
653-
kmeans = MiniBatchKMeans(
654-
self.n_landmark,
655-
init_size=3 * self.n_landmark,
656-
n_init=1,
657-
batch_size=10000,
658-
random_state=self.random_state,
659-
)
660-
self._clusters = kmeans.fit_predict(self.diff_op.dot(VT.T))
641+
642+
Calculates spectral clusters on the kernel, and calculates transition
643+
probabilities between cluster centers by using transition probabilities
644+
between samples assigned to each cluster.
645+
646+
random_landmarking:
647+
This method randomly selects n_landmark points and assigns each sample to its nearest landmark
648+
using Euclidean distance .
649+
661650
662651
"""
663652
if self.random_landmarking :
@@ -666,7 +655,7 @@ def build_landmark_op(self):
666655
n_samples = self.data.shape[0]
667656
rng = np.random.default_rng(self.random_state)
668657
landmark_indices = rng.choice(n_samples, self.n_landmark, replace=False)
669-
data = self.data if not hasattr(self, 'data_nu') else self.data_nu
658+
data = self.data if not hasattr(self, 'data_nu') else self.data_nu # because of the scaling to review
670659
distances = cdist(data, data[landmark_indices], metric="euclidean")
671660
if n_samples > 5000: # sklearn.euclidean_distances is faster than cdist for big dataset
672661
distances = euclidean_distances(data, data[landmark_indices])

0 commit comments

Comments
 (0)