Skip to content

Commit 56c12a5

Browse files
Merge pull request #4864 from janezd/louvain-cosine
[ENH] Louvain Clustering: Add cosine similarity
2 parents 0cc522f + 3f4b331 commit 56c12a5

File tree

2 files changed

+8
-1
lines changed

2 files changed

+8
-1
lines changed

Orange/clustering/louvain.py

Lines changed: 7 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -44,6 +44,13 @@ def matrix_to_knn_graph(data, k_neighbors, metric, progress_callback=None):
4444
4545
"""
4646
# We do k + 1 because each point is closest to itself, which is not useful
47+
if metric == "cosine":
48+
# Cosine distance on row-normalized data has the same ranking as
49+
# Euclidean distance, so we use the latter, which is more efficient
50+
# because it uses ball trees. We do not need actual distances. If we
51+
# would, the N * k distances can be recomputed later.
52+
data = data / np.linalg.norm(data, axis=1)[:, None]
53+
metric = "euclidean"
4754
knn = NearestNeighbors(n_neighbors=k_neighbors, metric=metric).fit(data)
4855
nearest_neighbors = knn.kneighbors(data, return_distance=False)
4956
# Convert to list of sets so jaccard can be computed efficiently

Orange/widgets/unsupervised/owlouvainclustering.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -42,7 +42,7 @@
4242
_DEFAULT_K_NEIGHBORS = 30
4343

4444

45-
METRICS = [("Euclidean", "l2"), ("Manhattan", "l1")]
45+
METRICS = [("Euclidean", "l2"), ("Manhattan", "l1"), ("Cosine", "cosine")]
4646

4747

4848
class OWLouvainClustering(widget.OWWidget):

0 commit comments

Comments
 (0)