Skip to content

Commit 4c99fc7

Browse files
author
Mohamed-Elyes Kanoun
committed
Adding random landmarking corrections
1 parent 5d9c9d0 commit 4c99fc7

File tree

1 file changed

+10
-13
lines changed

1 file changed

+10
-13
lines changed

graphtools/graphs.py

Lines changed: 10 additions & 13 deletions
Original file line numberDiff line numberDiff line change
@@ -649,24 +649,21 @@ def build_landmark_op(self):
649649
650650
651651
"""
652-
if self.random_landmarking :
653-
with _logger.log_task("landmark operator"):
654-
is_sparse = sparse.issparse(self.kernel)
652+
with _logger.log_task("landmark operator"):
653+
is_sparse = sparse.issparse(self.kernel)
654+
655+
if self.random_landmark:
655656
n_samples = self.data.shape[0]
656657
rng = np.random.default_rng(self.random_state)
657658
landmark_indices = rng.choice(n_samples, self.n_landmark, replace=False)
658-
data = self.data if not hasattr(self, 'data_nu') else self.data_nu # because of the scaling to review
659-
distances = cdist(data, data[landmark_indices], metric="euclidean")
660-
if n_samples > 5000: # sklearn.euclidean_distances is faster than cdist for big dataset
661-
distances = euclidean_distances(data, data[landmark_indices])
662-
else:
663-
distances = cdist(data, data[landmark_indices], metric="euclidean")
659+
data = self.data if not hasattr(self, 'data_nu') else self.data_nu
660+
# if n_samples > 5000 and self.distance == "euclidean": ( sklearn.euclidean_distances is faster than cdist for big dataset)
661+
# distances = euclidean_distances(data, data[landmark_indices])
662+
# this is a futur optimization for the euclidean case
663+
distances = cdist(data, data[landmark_indices], metric=self.distance)
664664
self._clusters = np.argmin(distances, axis=1)
665665

666-
else:
667-
with _logger.log_task("landmark operator"):
668-
is_sparse = sparse.issparse(self.kernel)
669-
# spectral clustering
666+
else:
670667
with _logger.log_task("SVD"):
671668
_, _, VT = randomized_svd(
672669
self.diff_aff,

0 commit comments

Comments
 (0)