1- # cython: infer_types=True
2- # Fast swap step in PAM algorithm for k_medoid.
3- # Author: Timothée Mathieu
4- # License: 3-clause BSD
5-
6- cimport cython
7- import numpy as np
8- cimport numpy as np
9-
10- from sklearn.utils.extmath import row_norms
11- from cython cimport floating
12-
13- from libc.stdint cimport int32_t, int64_t
14- # instead of int and long
15-
16-
17- import sys
18- from time import time
19-
20- from libc.math cimport exp, log, sqrt, pow , fabs
21- cimport numpy as np
22- from numpy.math cimport INFINITY
23-
24-
25- # Modified from sklearn.cluster._k_means_fast.pyx
26- np.import_array()
27-
28- cdef floating _euclidean_dense_dense(
29- floating* a, # IN
30- floating* b, # IN
31- int32_t n_features) nogil:
32- """ Euclidean distance between a dense and b dense"""
33- cdef:
34- int32_t i
35- int32_t n = n_features // 4
36- int32_t rem = n_features % 4
37- floating result = 0
38-
39- # We manually unroll the loop for better cache optimization.
40- for i in range (n):
41- result += ((a[0 ] - b[0 ]) * (a[0 ] - b[0 ])
42- + (a[1 ] - b[1 ]) * (a[1 ] - b[1 ])
43- + (a[2 ] - b[2 ]) * (a[2 ] - b[2 ])
44- + (a[3 ] - b[3 ]) * (a[3 ] - b[3 ]))
45- a += 4 ; b += 4
46-
47- for i in range (rem):
48- result += (a[i] - b[i]) * (a[i] - b[i])
49-
50- return result
51-
52-
53-
54- cpdef np.ndarray[floating] _kmeans_loss(np.ndarray[floating, ndim= 2 , mode= ' c' ] X,
55- int32_t[:] labels):
56- """ Compute inertia
57-
58- squared distancez between each sample and its assigned center.
59- """
60- if floating is float :
61- dtype = np.float32
62- elif floating is double :
63- dtype = np.double
64-
65- cdef:
66- int32_t n_samples = X.shape[0 ]
67- int32_t n_features = X.shape[1 ]
68- int32_t i, j
69- int32_t n_classes = len (np.unique(labels))
70- np.ndarray[floating, ndim= 2 ] centers = np.zeros([n_classes,
71- n_features],
72- dtype = dtype)
73- np.ndarray[np.int32] num_in_cluster = np.zeros(n_classes, dtype = np.int32)
74- np.ndarray[floating] inertias = np.zeros(n_samples, dtype = dtype)
75- for i in range (n_samples):
76- for j in range (n_features):
77- centers[labels[i], j] += X[i, j]
78- num_in_cluster[labels[i]] = num_in_cluster[labels[i]] + 1
79-
80- for i in range (n_classes):
81- for j in range (n_features):
82- centers[i, j] /= num_in_cluster[i]
83-
84- for i in range (n_samples):
85- j = labels[i]
86- inertias[i] = _euclidean_dense_dense(& X[i, 0 ], & centers[j, 0 ], n_features)
87- return inertias
88-
89-
90-
91-
92-
93- # Regression and Classification losses, from scikit-learn.
94-
95-
96-
97-
98- # ----------------------------------------
99- # Extension Types for Loss Functions
100- # ----------------------------------------
101-
102- cdef class LossFunction:
103- """ Base class for convex loss functions"""
104-
105- cdef double loss(self , double p, double y) nogil:
106- """ Evaluate the loss function.
107-
108- Parameters
109- ----------
110- p : double
111- The prediction, p = w^T x
112- y : double
113- The true value (aka target)
114-
115- Returns
116- -------
117- double
118- The loss evaluated at `p` and `y`.
119- """
120- return 0.
121-
122- def py_dloss (self , double p , double y ):
123- """ Python version of `dloss` for testing.
124-
125- Pytest needs a python function and can't use cdef functions.
126- """
127- return self .dloss(p, y)
128-
129- def py_loss (self , double p , double y ):
130- """ Python version of `dloss` for testing.
131-
132- Pytest needs a python function and can't use cdef functions.
133- """
134- return self .loss(p, y)
135-
136-
137- cdef double dloss(self , double p, double y) nogil:
138- """ Evaluate the derivative of the loss function with respect to
139- the prediction `p`.
140-
141- Parameters
142- ----------
143- p : double
144- The prediction, p = w^T x
145- y : double
146- The true value (aka target)
147- Returns
148- -------
149- double
150- The derivative of the loss function with regards to `p`.
151- """
152- return 0.
153-
154-
155- cdef class Regression(LossFunction):
156- """ Base class for loss functions for regression"""
1571
1582 cdef double loss(self , double p, double y) nogil:
1593 return 0.
@@ -336,4 +180,4 @@ cdef class Huber(Regression):
336180 return - self .c
337181
338182 def __reduce__ (self ):
339- return Huber, (self .c,)
183+ return Huber, (self .c,)
0 commit comments