Skip to content

Commit 924ef2f

Browse files
author
Bruno Alano
committed
Removed the cosine similarity since it's not a true distance metric. Use the arccos distance instead
1 parent 103c52b commit 924ef2f

File tree

1 file changed

+11
-11
lines changed

1 file changed

+11
-11
lines changed

hdbscan/dist_metrics.pyx

Lines changed: 11 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -1040,20 +1040,20 @@ cdef class HaversineDistance(DistanceMetric):
10401040
#------------------------------------------------------------
10411041
# Cosine Distance
10421042
# D(x, y) = dot(x, y) / (|x| * |y|)
1043-
# [This is not a true metric, so we will leave it out.]
1043+
# [This is not a true metric, so we will leave it out. Use the `arccos` distance instead]
10441044

1045-
cdef class CosineDistance(DistanceMetric):
1046-
cdef inline DTYPE_t dist(self, DTYPE_t* x1, DTYPE_t* x2, ITYPE_t size) nogil except -1:
1047-
cdef DTYPE_t d = 0, norm1 = 0, norm2 = 0
1048-
cdef np.intp_t j
1049-
for j in range(size):
1050-
d += x1[j] * x2[j]
1051-
norm1 += x1[j] * x1[j]
1052-
norm2 += x2[j] * x2[j]
1053-
return 1.0 - d / sqrt(norm1 * norm2)
1045+
#cdef class CosineDistance(DistanceMetric):
1046+
# cdef inline DTYPE_t dist(self, DTYPE_t* x1, DTYPE_t* x2, ITYPE_t size) nogil except -1:
1047+
# cdef DTYPE_t d = 0, norm1 = 0, norm2 = 0
1048+
# cdef np.intp_t j
1049+
# for j in range(size):
1050+
# d += x1[j] * x2[j]
1051+
# norm1 += x1[j] * x1[j]
1052+
# norm2 += x2[j] * x2[j]
1053+
# return 1.0 - d / sqrt(norm1 * norm2)
10541054

10551055
#------------------------------------------------------------
1056-
# Cosine Distance
1056+
# Arccos Distance
10571057
# D(x, y) = arccos(dot(x, y) / (|x| * |y|)) / PI
10581058

10591059
cdef class ArccosDistance(DistanceMetric):

0 commit comments

Comments
 (0)