Skip to content

Commit f821f28

Browse files
authored
Merge pull request #83 from MattScicluna/use_distance_in_graph
Use distance in graph
2 parents e8d8038 + fdfa6c5 commit f821f28

File tree

2 files changed

+18
-13
lines changed

2 files changed

+18
-13
lines changed

graphtools/graphs.py

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1205,11 +1205,11 @@ def build_landmark_op(self):
12051205
self.data if not hasattr(self, "data_nu") else self.data_nu
12061206
) # because of the scaling to review
12071207
if (
1208-
n_samples > 5000
1208+
n_samples > 5000 and self.distance == "euclidean"
12091209
): # sklearn.euclidean_distances is faster than cdist for big dataset
12101210
distances = euclidean_distances(data, data[landmark_indices])
12111211
else:
1212-
distances = cdist(data, data[landmark_indices], metric="euclidean")
1212+
distances = cdist(data, data[landmark_indices], metric=self.distance)
12131213
self._clusters = np.argmin(distances, axis=1)
12141214

12151215
else:

test/test_landmark.py

Lines changed: 16 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -284,29 +284,34 @@ def test_landmark_with_non_euclidean_distances():
284284
# "but all distance metrics gave identical results"
285285
# )
286286

287-
# Test that the landmark operators are different shapes/values when different distances
288-
# are used (this is a more sensitive test than just cluster assignments)
289-
euclidean_landmark_sum = np.sum(euclidean_G.landmark_op)
290-
manhattan_landmark_sum = np.sum(G_manhattan.landmark_op)
291-
cosine_landmark_sum = np.sum(G_cosine.landmark_op)
287+
# Compare landmark operators using Frobenius norm
288+
euclidean_landmark_op = euclidean_G.landmark_op
289+
manhattan_landmark_op = G_manhattan.landmark_op
290+
cosine_landmark_op = G_cosine.landmark_op
291+
292+
diff_euclidean_manhattan = np.linalg.norm(
293+
euclidean_landmark_op - manhattan_landmark_op, "fro"
294+
)
295+
diff_euclidean_cosine = np.linalg.norm(
296+
euclidean_landmark_op - cosine_landmark_op, "fro"
297+
)
292298

293299
print(
294-
f"Landmark operator sums: euclidean={euclidean_landmark_sum:.6f}, "
295-
f"manhattan={manhattan_landmark_sum:.6f}, cosine={cosine_landmark_sum:.6f}"
300+
f"Landmark operator differences: "
301+
f"euclidean vs manhattan={diff_euclidean_manhattan:.6f}, "
302+
f"euclidean vs cosine={diff_euclidean_cosine:.6f}"
296303
)
297304

298-
# The landmark operators should be different when using different distance metrics
299305
operators_different = (
300-
abs(euclidean_landmark_sum - manhattan_landmark_sum) > 1e-10
301-
or abs(euclidean_landmark_sum - cosine_landmark_sum) > 1e-10
306+
diff_euclidean_manhattan > 1e-6 or diff_euclidean_cosine > 1e-6
302307
)
303308

304309
if not operators_different:
305310
import warnings
306311

307312
warnings.warn(
308313
"Landmark operators are identical across different distance metrics. "
309-
"This strongly suggests the distance parameter is being ignored in build_landmark_op.",
314+
"This suggests the distance parameter is being ignored in build_landmark_op.",
310315
UserWarning,
311316
)
312317

0 commit comments

Comments
 (0)