1+ # cython: infer_types=True
2+ # Fast swap step in PAM algorithm for k_medoid.
3+ # Author: Timothée Mathieu
4+ # License: 3-clause BSD
5+
6+ cimport cython
7+ import numpy as np
8+ cimport numpy as np
9+
10+ from sklearn.utils.extmath import row_norms
11+ from cython cimport floating
12+
13+ import sys
14+ from time import time
15+
16+ from libc.math cimport exp, log, sqrt, pow , fabs
17+ cimport numpy as np
18+ from numpy.math cimport INFINITY
19+
20+
21+ # Modified from sklearn.cluster._k_means_fast.pyx
22+ np.import_array()
23+
24+ cdef floating _euclidean_dense_dense(
25+ floating* a, # IN
26+ floating* b, # IN
27+ int n_features) nogil:
28+ """ Euclidean distance between a dense and b dense"""
29+ cdef:
30+ int i
31+ int n = n_features // 4
32+ int rem = n_features % 4
33+ floating result = 0
34+
35+ # We manually unroll the loop for better cache optimization.
36+ for i in range (n):
37+ result += ((a[0 ] - b[0 ]) * (a[0 ] - b[0 ])
38+ + (a[1 ] - b[1 ]) * (a[1 ] - b[1 ])
39+ + (a[2 ] - b[2 ]) * (a[2 ] - b[2 ])
40+ + (a[3 ] - b[3 ]) * (a[3 ] - b[3 ]))
41+ a += 4 ; b += 4
42+
43+ for i in range (rem):
44+ result += (a[i] - b[i]) * (a[i] - b[i])
45+
46+ return result
47+
48+
49+
50+ cpdef np.ndarray[floating] _kmeans_loss(np.ndarray[floating, ndim= 2 , mode= ' c' ] X,
51+ int [:] labels):
52+ """ Compute inertia
53+
54+ squared distancez between each sample and its assigned center.
55+ """
56+ if floating is float :
57+ dtype = np.float32
58+ elif floating is double :
59+ dtype = np.double
60+
61+ cdef:
62+ int n_samples = X.shape[0 ]
63+ int n_features = X.shape[1 ]
64+ int i, j
65+ int n_classes = len (np.unique(labels))
66+ np.ndarray[floating, ndim= 2 ] centers = np.zeros([n_classes,
67+ n_features],
68+ dtype = dtype)
69+ np.ndarray[long ] num_in_cluster = np.zeros(n_classes, dtype = int )
70+ np.ndarray[floating] inertias = np.zeros(n_samples, dtype = dtype)
71+ for i in range (n_samples):
72+ for j in range (n_features):
73+ centers[labels[i], j] += X[i, j]
74+ num_in_cluster[labels[i]] += 1
75+
76+ for i in range (n_classes):
77+ for j in range (n_features):
78+ centers[i, j] /= num_in_cluster[i]
79+
80+ for i in range (n_samples):
81+ j = labels[i]
82+ inertias[i] = _euclidean_dense_dense(& X[i, 0 ], & centers[j, 0 ], n_features)
83+ return inertias
84+
85+
86+
87+
88+
89+ # Regression and Classification losses, from scikit-learn.
90+
91+
92+
93+
94+ # ----------------------------------------
95+ # Extension Types for Loss Functions
96+ # ----------------------------------------
97+
98+ cdef class LossFunction:
99+ """ Base class for convex loss functions"""
100+
101+ cdef double loss(self , double p, double y) nogil:
102+ """ Evaluate the loss function.
103+
104+ Parameters
105+ ----------
106+ p : double
107+ The prediction, p = w^T x
108+ y : double
109+ The true value (aka target)
110+
111+ Returns
112+ -------
113+ double
114+ The loss evaluated at `p` and `y`.
115+ """
116+ return 0.
117+
118+ def py_dloss (self , double p , double y ):
119+ """ Python version of `dloss` for testing.
120+
121+ Pytest needs a python function and can't use cdef functions.
122+ """
123+ return self .dloss(p, y)
124+
125+ def py_loss (self , double p , double y ):
126+ """ Python version of `dloss` for testing.
127+
128+ Pytest needs a python function and can't use cdef functions.
129+ """
130+ return self .loss(p, y)
131+
132+
133+ cdef double dloss(self , double p, double y) nogil:
134+ """ Evaluate the derivative of the loss function with respect to
135+ the prediction `p`.
136+
137+ Parameters
138+ ----------
139+ p : double
140+ The prediction, p = w^T x
141+ y : double
142+ The true value (aka target)
143+ Returns
144+ -------
145+ double
146+ The derivative of the loss function with regards to `p`.
147+ """
148+ return 0.
149+
150+
151+ cdef class Regression(LossFunction):
152+ """ Base class for loss functions for regression"""
1153
2154 cdef double loss(self , double p, double y) nogil:
3155 return 0.
@@ -180,4 +332,4 @@ cdef class Huber(Regression):
180332 return - self .c
181333
182334 def __reduce__ (self ):
183- return Huber, (self .c,)
335+ return Huber, (self .c,)
0 commit comments