1- #!python
2- #cython: boundscheck=False
3- #cython: wraparound=False
4- #cython: cdivision=True
1+ # !python
2+ # cython: boundscheck=False
3+ # cython: wraparound=False
4+ # cython: cdivision=True
55
66# By Jake Vanderplas (2013) <[email protected] >77# written for the scikit-learn project
@@ -22,7 +22,7 @@ ITYPE = np.intp
2222# Numpy 1.3-1.4 compatibility utilities
2323cdef DTYPE_t[:, ::1 ] get_memview_DTYPE_2D(
2424 np.ndarray[DTYPE_t, ndim= 2 , mode= ' c' ] X):
25- return < DTYPE_t [:X .shape [0 ],:X .shape [1 ]:1 ]> (< DTYPE_t * > X .data )
25+ return < DTYPE_t[:X.shape[0 ], :X.shape[1 ]:1 ]> (< DTYPE_t* > X.data)
2626
2727
2828cdef DTYPE_t* get_vec_ptr(np.ndarray[DTYPE_t, ndim= 1 , mode= ' c' ] vec):
@@ -98,7 +98,8 @@ def get_valid_metric_ids(L):
9898 """ Given an iterable of metric class names or class identifiers,
9999 return a list of metric IDs which map to those classes.
100100
101- Example:
101+ Examples
102+ --------
102103 >>> L = get_valid_metric_ids([EuclideanDistance, 'ManhattanDistance'])
103104 >>> sorted(L)
104105 ['cityblock', 'euclidean', 'l1', 'l2', 'manhattan']
@@ -115,6 +116,10 @@ cdef class DistanceMetric:
115116 This class provides a uniform interface to fast distance metric
116117 functions. The various metrics can be accessed via the `get_metric`
117118 class method and the metric string identifier (see below).
119+
120+ Examples
121+ --------
122+
118123 For example, to use the Euclidean distance:
119124
120125 >>> dist = DistanceMetric.get_metric('euclidean')
@@ -391,20 +396,20 @@ cdef class DistanceMetric:
391396 Xarr = np.asarray(X, dtype = DTYPE, order = ' C' )
392397 if Y is None :
393398 Darr = np.zeros((Xarr.shape[0 ], Xarr.shape[0 ]),
394- dtype = DTYPE , order = 'C' )
399+ dtype = DTYPE, order = ' C' )
395400 self .pdist(get_memview_DTYPE_2D(Xarr),
396401 get_memview_DTYPE_2D(Darr))
397402 else :
398403 Yarr = np.asarray(Y, dtype = DTYPE, order = ' C' )
399404 Darr = np.zeros((Xarr.shape[0 ], Yarr.shape[0 ]),
400- dtype = DTYPE , order = 'C' )
405+ dtype = DTYPE, order = ' C' )
401406 self .cdist(get_memview_DTYPE_2D(Xarr),
402407 get_memview_DTYPE_2D(Yarr),
403408 get_memview_DTYPE_2D(Darr))
404409 return Darr
405410
406411
407- #------------------------------------------------------------
412+ # ------------------------------------------------------------
408413# Euclidean Distance
409414# d = sqrt(sum(x_i^2 - y_i^2))
410415cdef class EuclideanDistance(DistanceMetric):
@@ -437,7 +442,7 @@ cdef class EuclideanDistance(DistanceMetric):
437442 return dist ** 2
438443
439444
440- #------------------------------------------------------------
445+ # ------------------------------------------------------------
441446# SEuclidean Distance
442447# d = sqrt(sum((x_i - y_i2)^2 / v_i))
443448cdef class SEuclideanDistance(DistanceMetric):
@@ -481,7 +486,7 @@ cdef class SEuclideanDistance(DistanceMetric):
481486 return dist ** 2
482487
483488
484- #------------------------------------------------------------
489+ # ------------------------------------------------------------
485490# Manhattan Distance
486491# d = sum(abs(x_i - y_i))
487492cdef class ManhattanDistance(DistanceMetric):
@@ -502,7 +507,7 @@ cdef class ManhattanDistance(DistanceMetric):
502507 return d
503508
504509
505- #------------------------------------------------------------
510+ # ------------------------------------------------------------
506511# Chebyshev Distance
507512# d = max_i(abs(x_i), abs(y_i))
508513cdef class ChebyshevDistance(DistanceMetric):
@@ -523,7 +528,7 @@ cdef class ChebyshevDistance(DistanceMetric):
523528 return d
524529
525530
526- #------------------------------------------------------------
531+ # ------------------------------------------------------------
527532# Minkowski Distance
528533# d = sum(x_i^p - y_i^p) ^ (1/p)
529534cdef class MinkowskiDistance(DistanceMetric):
@@ -570,7 +575,7 @@ cdef class MinkowskiDistance(DistanceMetric):
570575 return dist ** self .p
571576
572577
573- #------------------------------------------------------------
578+ # ------------------------------------------------------------
574579# W-Minkowski Distance
575580# d = sum(w_i * (x_i^p - y_i^p)) ^ (1/p)
576581cdef class WMinkowskiDistance(DistanceMetric):
@@ -629,7 +634,7 @@ cdef class WMinkowskiDistance(DistanceMetric):
629634 return dist ** self .p
630635
631636
632- #------------------------------------------------------------
637+ # ------------------------------------------------------------
633638# Mahalanobis Distance
634639# d = sqrt( (x - y)^T V^-1 (x - y) )
635640cdef class MahalanobisDistance(DistanceMetric):
@@ -699,7 +704,7 @@ cdef class MahalanobisDistance(DistanceMetric):
699704 return dist ** 2
700705
701706
702- #------------------------------------------------------------
707+ # ------------------------------------------------------------
703708# Hamming Distance
704709# d = N_unequal(x, y) / N_tot
705710cdef class HammingDistance(DistanceMetric):
@@ -721,7 +726,7 @@ cdef class HammingDistance(DistanceMetric):
721726 return float (n_unequal) / size
722727
723728
724- #------------------------------------------------------------
729+ # ------------------------------------------------------------
725730# Canberra Distance
726731# D(x, y) = sum[ abs(x_i - y_i) / (abs(x_i) + abs(y_i)) ]
727732cdef class CanberraDistance(DistanceMetric):
@@ -744,7 +749,7 @@ cdef class CanberraDistance(DistanceMetric):
744749 return d
745750
746751
747- #------------------------------------------------------------
752+ # ------------------------------------------------------------
748753# Bray-Curtis Distance
749754# D(x, y) = sum[abs(x_i - y_i)] / sum[abs(x_i) + abs(y_i)]
750755cdef class BrayCurtisDistance(DistanceMetric):
@@ -769,7 +774,7 @@ cdef class BrayCurtisDistance(DistanceMetric):
769774 return 0.0
770775
771776
772- #------------------------------------------------------------
777+ # ------------------------------------------------------------
773778# Jaccard Distance (boolean)
774779# D(x, y) = N_unequal(x, y) / N_nonzero(x, y)
775780cdef class JaccardDistance(DistanceMetric):
@@ -796,7 +801,7 @@ cdef class JaccardDistance(DistanceMetric):
796801 return (nnz - n_eq) * 1.0 / nnz
797802
798803
799- #------------------------------------------------------------
804+ # ------------------------------------------------------------
800805# Matching Distance (boolean)
801806# D(x, y) = n_neq / n
802807cdef class MatchingDistance(DistanceMetric):
@@ -820,7 +825,7 @@ cdef class MatchingDistance(DistanceMetric):
820825 return n_neq * 1. / size
821826
822827
823- #------------------------------------------------------------
828+ # ------------------------------------------------------------
824829# Dice Distance (boolean)
825830# D(x, y) = n_neq / (2 * ntt + n_neq)
826831cdef class DiceDistance(DistanceMetric):
@@ -845,7 +850,7 @@ cdef class DiceDistance(DistanceMetric):
845850 return n_neq / (2.0 * ntt + n_neq)
846851
847852
848- #------------------------------------------------------------
853+ # ------------------------------------------------------------
849854# Kulsinski Distance (boolean)
850855# D(x, y) = (ntf + nft - ntt + n) / (n_neq + n)
851856cdef class KulsinskiDistance(DistanceMetric):
@@ -870,7 +875,7 @@ cdef class KulsinskiDistance(DistanceMetric):
870875 return (n_neq - ntt + size) * 1.0 / (n_neq + size)
871876
872877
873- #------------------------------------------------------------
878+ # ------------------------------------------------------------
874879# Rogers-Tanimoto Distance (boolean)
875880# D(x, y) = 2 * n_neq / (n + n_neq)
876881cdef class RogersTanimotoDistance(DistanceMetric):
@@ -894,7 +899,7 @@ cdef class RogersTanimotoDistance(DistanceMetric):
894899 return (2.0 * n_neq) / (size + n_neq)
895900
896901
897- #------------------------------------------------------------
902+ # ------------------------------------------------------------
898903# Russell-Rao Distance (boolean)
899904# D(x, y) = (n - ntt) / n
900905cdef class RussellRaoDistance(DistanceMetric):
@@ -918,7 +923,7 @@ cdef class RussellRaoDistance(DistanceMetric):
918923 return (size - ntt) * 1. / size
919924
920925
921- #------------------------------------------------------------
926+ # ------------------------------------------------------------
922927# Sokal-Michener Distance (boolean)
923928# D(x, y) = 2 * n_neq / (n + n_neq)
924929cdef class SokalMichenerDistance(DistanceMetric):
@@ -942,7 +947,7 @@ cdef class SokalMichenerDistance(DistanceMetric):
942947 return (2.0 * n_neq) / (size + n_neq)
943948
944949
945- #------------------------------------------------------------
950+ # ------------------------------------------------------------
946951# Sokal-Sneath Distance (boolean)
947952# D(x, y) = n_neq / (0.5 * n_tt + n_neq)
948953cdef class SokalSneathDistance(DistanceMetric):
@@ -967,7 +972,7 @@ cdef class SokalSneathDistance(DistanceMetric):
967972 return n_neq / (0.5 * ntt + n_neq)
968973
969974
970- #------------------------------------------------------------
975+ # ------------------------------------------------------------
971976# Haversine Distance (2 dimensional)
972977# D(x, y) = 2 arcsin{sqrt[sin^2 ((x1 - y1) / 2)
973978# + cos(x1) cos(y1) sin^2 ((x2 - y2) / 2)]}
@@ -994,14 +999,15 @@ cdef class HaversineDistance(DistanceMetric):
994999 return (sin_0 * sin_0 + cos(x1[0 ]) * cos(x2[0 ]) * sin_1 * sin_1)
9951000
9961001 cdef inline DTYPE_t dist(self , DTYPE_t* x1, DTYPE_t* x2,
997- ITYPE_t size ) nogil except - 1 :
1002+ ITYPE_t size) nogil except - 1 :
9981003 if size != 2 :
9991004 with gil:
1000- raise ValueError ("Haversine distance only valid in 2 dimensions" )
1005+ raise ValueError (" Haversine distance only valid in"
1006+ " 2 dimensions" )
10011007 cdef DTYPE_t sin_0 = sin(0.5 * (x1[0 ] - x2[0 ]))
10021008 cdef DTYPE_t sin_1 = sin(0.5 * (x1[1 ] - x2[1 ]))
1003- return 2 * asin (sqrt (sin_0 * sin_0
1004- + cos (x1 [0 ]) * cos (x2 [0 ]) * sin_1 * sin_1 ))
1009+ return 2 * asin(sqrt(sin_0 * sin_0 +
1010+ cos(x1[0 ]) * cos(x2[0 ]) * sin_1 * sin_1))
10051011
10061012 cdef inline DTYPE_t _rdist_to_dist(self , DTYPE_t rdist) except - 1 :
10071013 return 2 * asin(sqrt(rdist))
@@ -1018,12 +1024,12 @@ cdef class HaversineDistance(DistanceMetric):
10181024 return tmp * tmp
10191025
10201026
1021- #------------------------------------------------------------
1027+ # ------------------------------------------------------------
10221028# Yule Distance (boolean)
10231029# D(x, y) = 2 * ntf * nft / (ntt * nff + ntf * nft)
10241030# [This is not a true metric, so we will leave it out.]
10251031#
1026- #cdef class YuleDistance(DistanceMetric):
1032+ # cdef class YuleDistance(DistanceMetric):
10271033# cdef inline DTYPE_t dist(self, DTYPE_t* x1, DTYPE_t* x2, ITYPE_t size):
10281034# cdef int tf1, tf2, ntf = 0, nft = 0, ntt = 0, nff = 0
10291035# cdef np.intp_t j
@@ -1037,13 +1043,15 @@ cdef class HaversineDistance(DistanceMetric):
10371043# return (2.0 * ntf * nft) / (ntt * nff + ntf * nft)
10381044
10391045
1040- #------------------------------------------------------------
1046+ # ------------------------------------------------------------
10411047# Cosine Distance
10421048# D(x, y) = dot(x, y) / (|x| * |y|)
1043- # [This is not a true metric, so we will leave it out. Use the `arccos` distance instead]
1049+ # [This is not a true metric, so we will leave it out. Use the `arccos`
1050+ # distance instead]
10441051
1045- #cdef class CosineDistance(DistanceMetric):
1046- # cdef inline DTYPE_t dist(self, DTYPE_t* x1, DTYPE_t* x2, ITYPE_t size) nogil except -1:
1052+ # cdef class CosineDistance(DistanceMetric):
1053+ # cdef inline DTYPE_t dist(self, DTYPE_t* x1, DTYPE_t* x2,
1054+ # ITYPE_t size) nogil except -1:
10471055# cdef DTYPE_t d = 0, norm1 = 0, norm2 = 0
10481056# cdef np.intp_t j
10491057# for j in range(size):
@@ -1052,12 +1060,13 @@ cdef class HaversineDistance(DistanceMetric):
10521060# norm2 += x2[j] * x2[j]
10531061# return 1.0 - d / sqrt(norm1 * norm2)
10541062
1055- #------------------------------------------------------------
1063+ # ------------------------------------------------------------
10561064# Arccos Distance
10571065# D(x, y) = arccos(dot(x, y) / (|x| * |y|)) / PI
10581066
10591067cdef class ArccosDistance(DistanceMetric):
1060- cdef inline DTYPE_t dist (self , DTYPE_t * x1 , DTYPE_t * x2 , ITYPE_t size ) nogil except - 1 :
1068+ cdef inline DTYPE_t dist(self , DTYPE_t* x1, DTYPE_t* x2,
1069+ ITYPE_t size) nogil except - 1 :
10611070 cdef DTYPE_t d = 0 , norm1 = 0 , norm2 = 0
10621071 cdef np.intp_t j
10631072 for j in range (size):
@@ -1067,13 +1076,12 @@ cdef class ArccosDistance(DistanceMetric):
10671076 return acos(1.0 - d / sqrt(norm1 * norm2)) / M_PI
10681077
10691078
1070-
1071- #------------------------------------------------------------
1079+ # ------------------------------------------------------------
10721080# Correlation Distance
10731081# D(x, y) = dot((x - mx), (y - my)) / (|x - mx| * |y - my|)
10741082# [This is not a true metric, so we will leave it out.]
10751083#
1076- #cdef class CorrelationDistance(DistanceMetric):
1084+ # cdef class CorrelationDistance(DistanceMetric):
10771085# cdef inline DTYPE_t dist(self, DTYPE_t* x1, DTYPE_t* x2, ITYPE_t size):
10781086# cdef DTYPE_t mu1 = 0, mu2 = 0, x1nrm = 0, x2nrm = 0, x1Tx2 = 0
10791087# cdef DTYPE_t tmp1, tmp2
@@ -1095,7 +1103,7 @@ cdef class ArccosDistance(DistanceMetric):
10951103# return (1. - x1Tx2) / sqrt(x1nrm * x2nrm)
10961104
10971105
1098- #------------------------------------------------------------
1106+ # ------------------------------------------------------------
10991107# User-defined distance
11001108#
11011109cdef class PyFuncDistance(DistanceMetric):
@@ -1134,4 +1142,4 @@ cdef class PyFuncDistance(DistanceMetric):
11341142
11351143
11361144cdef inline double fmax(double a, double b) nogil:
1137- return max (a , b )
1145+ return max (a, b)
0 commit comments