Skip to content

Commit b866bfa

Browse files
author
Guillaume Lemaitre
committed
PEP 8 for dis_metric.pyx
1 parent ff2a671 commit b866bfa

File tree

1 file changed

+52
-44
lines changed

1 file changed

+52
-44
lines changed

hdbscan/dist_metrics.pyx

Lines changed: 52 additions & 44 deletions
Original file line numberDiff line numberDiff line change
@@ -1,7 +1,7 @@
1-
#!python
2-
#cython: boundscheck=False
3-
#cython: wraparound=False
4-
#cython: cdivision=True
1+
# !python
2+
# cython: boundscheck=False
3+
# cython: wraparound=False
4+
# cython: cdivision=True
55

66
# By Jake Vanderplas (2013) <[email protected]>
77
# written for the scikit-learn project
@@ -22,7 +22,7 @@ ITYPE = np.intp
2222
# Numpy 1.3-1.4 compatibility utilities
2323
cdef DTYPE_t[:, ::1] get_memview_DTYPE_2D(
2424
np.ndarray[DTYPE_t, ndim=2, mode='c'] X):
25-
return <DTYPE_t[:X.shape[0],:X.shape[1]:1]> (<DTYPE_t*> X.data)
25+
return <DTYPE_t[:X.shape[0], :X.shape[1]:1]> (<DTYPE_t*> X.data)
2626

2727

2828
cdef DTYPE_t* get_vec_ptr(np.ndarray[DTYPE_t, ndim=1, mode='c'] vec):
@@ -98,7 +98,8 @@ def get_valid_metric_ids(L):
9898
"""Given an iterable of metric class names or class identifiers,
9999
return a list of metric IDs which map to those classes.
100100
101-
Example:
101+
Examples
102+
--------
102103
>>> L = get_valid_metric_ids([EuclideanDistance, 'ManhattanDistance'])
103104
>>> sorted(L)
104105
['cityblock', 'euclidean', 'l1', 'l2', 'manhattan']
@@ -115,6 +116,10 @@ cdef class DistanceMetric:
115116
This class provides a uniform interface to fast distance metric
116117
functions. The various metrics can be accessed via the `get_metric`
117118
class method and the metric string identifier (see below).
119+
120+
Examples
121+
--------
122+
118123
For example, to use the Euclidean distance:
119124
120125
>>> dist = DistanceMetric.get_metric('euclidean')
@@ -391,20 +396,20 @@ cdef class DistanceMetric:
391396
Xarr = np.asarray(X, dtype=DTYPE, order='C')
392397
if Y is None:
393398
Darr = np.zeros((Xarr.shape[0], Xarr.shape[0]),
394-
dtype=DTYPE, order='C')
399+
dtype=DTYPE, order='C')
395400
self.pdist(get_memview_DTYPE_2D(Xarr),
396401
get_memview_DTYPE_2D(Darr))
397402
else:
398403
Yarr = np.asarray(Y, dtype=DTYPE, order='C')
399404
Darr = np.zeros((Xarr.shape[0], Yarr.shape[0]),
400-
dtype=DTYPE, order='C')
405+
dtype=DTYPE, order='C')
401406
self.cdist(get_memview_DTYPE_2D(Xarr),
402407
get_memview_DTYPE_2D(Yarr),
403408
get_memview_DTYPE_2D(Darr))
404409
return Darr
405410

406411

407-
#------------------------------------------------------------
412+
# ------------------------------------------------------------
408413
# Euclidean Distance
409414
# d = sqrt(sum(x_i^2 - y_i^2))
410415
cdef class EuclideanDistance(DistanceMetric):
@@ -437,7 +442,7 @@ cdef class EuclideanDistance(DistanceMetric):
437442
return dist ** 2
438443

439444

440-
#------------------------------------------------------------
445+
# ------------------------------------------------------------
441446
# SEuclidean Distance
442447
# d = sqrt(sum((x_i - y_i2)^2 / v_i))
443448
cdef class SEuclideanDistance(DistanceMetric):
@@ -481,7 +486,7 @@ cdef class SEuclideanDistance(DistanceMetric):
481486
return dist ** 2
482487

483488

484-
#------------------------------------------------------------
489+
# ------------------------------------------------------------
485490
# Manhattan Distance
486491
# d = sum(abs(x_i - y_i))
487492
cdef class ManhattanDistance(DistanceMetric):
@@ -502,7 +507,7 @@ cdef class ManhattanDistance(DistanceMetric):
502507
return d
503508

504509

505-
#------------------------------------------------------------
510+
# ------------------------------------------------------------
506511
# Chebyshev Distance
507512
# d = max_i(abs(x_i), abs(y_i))
508513
cdef class ChebyshevDistance(DistanceMetric):
@@ -523,7 +528,7 @@ cdef class ChebyshevDistance(DistanceMetric):
523528
return d
524529

525530

526-
#------------------------------------------------------------
531+
# ------------------------------------------------------------
527532
# Minkowski Distance
528533
# d = sum(x_i^p - y_i^p) ^ (1/p)
529534
cdef class MinkowskiDistance(DistanceMetric):
@@ -570,7 +575,7 @@ cdef class MinkowskiDistance(DistanceMetric):
570575
return dist ** self.p
571576

572577

573-
#------------------------------------------------------------
578+
# ------------------------------------------------------------
574579
# W-Minkowski Distance
575580
# d = sum(w_i * (x_i^p - y_i^p)) ^ (1/p)
576581
cdef class WMinkowskiDistance(DistanceMetric):
@@ -629,7 +634,7 @@ cdef class WMinkowskiDistance(DistanceMetric):
629634
return dist ** self.p
630635

631636

632-
#------------------------------------------------------------
637+
# ------------------------------------------------------------
633638
# Mahalanobis Distance
634639
# d = sqrt( (x - y)^T V^-1 (x - y) )
635640
cdef class MahalanobisDistance(DistanceMetric):
@@ -699,7 +704,7 @@ cdef class MahalanobisDistance(DistanceMetric):
699704
return dist ** 2
700705

701706

702-
#------------------------------------------------------------
707+
# ------------------------------------------------------------
703708
# Hamming Distance
704709
# d = N_unequal(x, y) / N_tot
705710
cdef class HammingDistance(DistanceMetric):
@@ -721,7 +726,7 @@ cdef class HammingDistance(DistanceMetric):
721726
return float(n_unequal) / size
722727

723728

724-
#------------------------------------------------------------
729+
# ------------------------------------------------------------
725730
# Canberra Distance
726731
# D(x, y) = sum[ abs(x_i - y_i) / (abs(x_i) + abs(y_i)) ]
727732
cdef class CanberraDistance(DistanceMetric):
@@ -744,7 +749,7 @@ cdef class CanberraDistance(DistanceMetric):
744749
return d
745750

746751

747-
#------------------------------------------------------------
752+
# ------------------------------------------------------------
748753
# Bray-Curtis Distance
749754
# D(x, y) = sum[abs(x_i - y_i)] / sum[abs(x_i) + abs(y_i)]
750755
cdef class BrayCurtisDistance(DistanceMetric):
@@ -769,7 +774,7 @@ cdef class BrayCurtisDistance(DistanceMetric):
769774
return 0.0
770775

771776

772-
#------------------------------------------------------------
777+
# ------------------------------------------------------------
773778
# Jaccard Distance (boolean)
774779
# D(x, y) = N_unequal(x, y) / N_nonzero(x, y)
775780
cdef class JaccardDistance(DistanceMetric):
@@ -796,7 +801,7 @@ cdef class JaccardDistance(DistanceMetric):
796801
return (nnz - n_eq) * 1.0 / nnz
797802

798803

799-
#------------------------------------------------------------
804+
# ------------------------------------------------------------
800805
# Matching Distance (boolean)
801806
# D(x, y) = n_neq / n
802807
cdef class MatchingDistance(DistanceMetric):
@@ -820,7 +825,7 @@ cdef class MatchingDistance(DistanceMetric):
820825
return n_neq * 1. / size
821826

822827

823-
#------------------------------------------------------------
828+
# ------------------------------------------------------------
824829
# Dice Distance (boolean)
825830
# D(x, y) = n_neq / (2 * ntt + n_neq)
826831
cdef class DiceDistance(DistanceMetric):
@@ -845,7 +850,7 @@ cdef class DiceDistance(DistanceMetric):
845850
return n_neq / (2.0 * ntt + n_neq)
846851

847852

848-
#------------------------------------------------------------
853+
# ------------------------------------------------------------
849854
# Kulsinski Distance (boolean)
850855
# D(x, y) = (ntf + nft - ntt + n) / (n_neq + n)
851856
cdef class KulsinskiDistance(DistanceMetric):
@@ -870,7 +875,7 @@ cdef class KulsinskiDistance(DistanceMetric):
870875
return (n_neq - ntt + size) * 1.0 / (n_neq + size)
871876

872877

873-
#------------------------------------------------------------
878+
# ------------------------------------------------------------
874879
# Rogers-Tanimoto Distance (boolean)
875880
# D(x, y) = 2 * n_neq / (n + n_neq)
876881
cdef class RogersTanimotoDistance(DistanceMetric):
@@ -894,7 +899,7 @@ cdef class RogersTanimotoDistance(DistanceMetric):
894899
return (2.0 * n_neq) / (size + n_neq)
895900

896901

897-
#------------------------------------------------------------
902+
# ------------------------------------------------------------
898903
# Russell-Rao Distance (boolean)
899904
# D(x, y) = (n - ntt) / n
900905
cdef class RussellRaoDistance(DistanceMetric):
@@ -918,7 +923,7 @@ cdef class RussellRaoDistance(DistanceMetric):
918923
return (size - ntt) * 1. / size
919924

920925

921-
#------------------------------------------------------------
926+
# ------------------------------------------------------------
922927
# Sokal-Michener Distance (boolean)
923928
# D(x, y) = 2 * n_neq / (n + n_neq)
924929
cdef class SokalMichenerDistance(DistanceMetric):
@@ -942,7 +947,7 @@ cdef class SokalMichenerDistance(DistanceMetric):
942947
return (2.0 * n_neq) / (size + n_neq)
943948

944949

945-
#------------------------------------------------------------
950+
# ------------------------------------------------------------
946951
# Sokal-Sneath Distance (boolean)
947952
# D(x, y) = n_neq / (0.5 * n_tt + n_neq)
948953
cdef class SokalSneathDistance(DistanceMetric):
@@ -967,7 +972,7 @@ cdef class SokalSneathDistance(DistanceMetric):
967972
return n_neq / (0.5 * ntt + n_neq)
968973

969974

970-
#------------------------------------------------------------
975+
# ------------------------------------------------------------
971976
# Haversine Distance (2 dimensional)
972977
# D(x, y) = 2 arcsin{sqrt[sin^2 ((x1 - y1) / 2)
973978
# + cos(x1) cos(y1) sin^2 ((x2 - y2) / 2)]}
@@ -994,14 +999,15 @@ cdef class HaversineDistance(DistanceMetric):
994999
return (sin_0 * sin_0 + cos(x1[0]) * cos(x2[0]) * sin_1 * sin_1)
9951000

9961001
cdef inline DTYPE_t dist(self, DTYPE_t* x1, DTYPE_t* x2,
997-
ITYPE_t size) nogil except -1:
1002+
ITYPE_t size) nogil except -1:
9981003
if size != 2:
9991004
with gil:
1000-
raise ValueError("Haversine distance only valid in 2 dimensions")
1005+
raise ValueError("Haversine distance only valid in"
1006+
" 2 dimensions")
10011007
cdef DTYPE_t sin_0 = sin(0.5 * (x1[0] - x2[0]))
10021008
cdef DTYPE_t sin_1 = sin(0.5 * (x1[1] - x2[1]))
1003-
return 2 * asin(sqrt(sin_0 * sin_0
1004-
+ cos(x1[0]) * cos(x2[0]) * sin_1 * sin_1))
1009+
return 2 * asin(sqrt(sin_0 * sin_0 +
1010+
cos(x1[0]) * cos(x2[0]) * sin_1 * sin_1))
10051011

10061012
cdef inline DTYPE_t _rdist_to_dist(self, DTYPE_t rdist) except -1:
10071013
return 2 * asin(sqrt(rdist))
@@ -1018,12 +1024,12 @@ cdef class HaversineDistance(DistanceMetric):
10181024
return tmp * tmp
10191025

10201026

1021-
#------------------------------------------------------------
1027+
# ------------------------------------------------------------
10221028
# Yule Distance (boolean)
10231029
# D(x, y) = 2 * ntf * nft / (ntt * nff + ntf * nft)
10241030
# [This is not a true metric, so we will leave it out.]
10251031
#
1026-
#cdef class YuleDistance(DistanceMetric):
1032+
# cdef class YuleDistance(DistanceMetric):
10271033
# cdef inline DTYPE_t dist(self, DTYPE_t* x1, DTYPE_t* x2, ITYPE_t size):
10281034
# cdef int tf1, tf2, ntf = 0, nft = 0, ntt = 0, nff = 0
10291035
# cdef np.intp_t j
@@ -1037,13 +1043,15 @@ cdef class HaversineDistance(DistanceMetric):
10371043
# return (2.0 * ntf * nft) / (ntt * nff + ntf * nft)
10381044

10391045

1040-
#------------------------------------------------------------
1046+
# ------------------------------------------------------------
10411047
# Cosine Distance
10421048
# D(x, y) = dot(x, y) / (|x| * |y|)
1043-
# [This is not a true metric, so we will leave it out. Use the `arccos` distance instead]
1049+
# [This is not a true metric, so we will leave it out. Use the `arccos`
1050+
# distance instead]
10441051

1045-
#cdef class CosineDistance(DistanceMetric):
1046-
# cdef inline DTYPE_t dist(self, DTYPE_t* x1, DTYPE_t* x2, ITYPE_t size) nogil except -1:
1052+
# cdef class CosineDistance(DistanceMetric):
1053+
# cdef inline DTYPE_t dist(self, DTYPE_t* x1, DTYPE_t* x2,
1054+
# ITYPE_t size) nogil except -1:
10471055
# cdef DTYPE_t d = 0, norm1 = 0, norm2 = 0
10481056
# cdef np.intp_t j
10491057
# for j in range(size):
@@ -1052,12 +1060,13 @@ cdef class HaversineDistance(DistanceMetric):
10521060
# norm2 += x2[j] * x2[j]
10531061
# return 1.0 - d / sqrt(norm1 * norm2)
10541062

1055-
#------------------------------------------------------------
1063+
# ------------------------------------------------------------
10561064
# Arccos Distance
10571065
# D(x, y) = arccos(dot(x, y) / (|x| * |y|)) / PI
10581066

10591067
cdef class ArccosDistance(DistanceMetric):
1060-
cdef inline DTYPE_t dist(self, DTYPE_t* x1, DTYPE_t* x2, ITYPE_t size) nogil except -1:
1068+
cdef inline DTYPE_t dist(self, DTYPE_t* x1, DTYPE_t* x2,
1069+
ITYPE_t size) nogil except -1:
10611070
cdef DTYPE_t d = 0, norm1 = 0, norm2 = 0
10621071
cdef np.intp_t j
10631072
for j in range(size):
@@ -1067,13 +1076,12 @@ cdef class ArccosDistance(DistanceMetric):
10671076
return acos(1.0 - d / sqrt(norm1 * norm2)) / M_PI
10681077

10691078

1070-
1071-
#------------------------------------------------------------
1079+
# ------------------------------------------------------------
10721080
# Correlation Distance
10731081
# D(x, y) = dot((x - mx), (y - my)) / (|x - mx| * |y - my|)
10741082
# [This is not a true metric, so we will leave it out.]
10751083
#
1076-
#cdef class CorrelationDistance(DistanceMetric):
1084+
# cdef class CorrelationDistance(DistanceMetric):
10771085
# cdef inline DTYPE_t dist(self, DTYPE_t* x1, DTYPE_t* x2, ITYPE_t size):
10781086
# cdef DTYPE_t mu1 = 0, mu2 = 0, x1nrm = 0, x2nrm = 0, x1Tx2 = 0
10791087
# cdef DTYPE_t tmp1, tmp2
@@ -1095,7 +1103,7 @@ cdef class ArccosDistance(DistanceMetric):
10951103
# return (1. - x1Tx2) / sqrt(x1nrm * x2nrm)
10961104

10971105

1098-
#------------------------------------------------------------
1106+
# ------------------------------------------------------------
10991107
# User-defined distance
11001108
#
11011109
cdef class PyFuncDistance(DistanceMetric):
@@ -1134,4 +1142,4 @@ cdef class PyFuncDistance(DistanceMetric):
11341142

11351143

11361144
cdef inline double fmax(double a, double b) nogil:
1137-
return max(a, b)
1145+
return max(a, b)

0 commit comments

Comments
 (0)