1
+ # cython: infer_types=True
2
+ # Fast swap step in PAM algorithm for k_medoid.
3
+ # Author: Timothée Mathieu
4
+ # License: 3-clause BSD
5
+
6
+ cimport cython
7
+ import numpy as np
8
+ cimport numpy as np
9
+
10
+ from sklearn.utils.extmath import row_norms
11
+ from cython cimport floating
12
+
13
+ import sys
14
+ from time import time
15
+
16
+ from libc.math cimport exp, log, sqrt, pow , fabs
17
+ cimport numpy as np
18
+ from numpy.math cimport INFINITY
19
+
20
+
21
+ # Modified from sklearn.cluster._k_means_fast.pyx
22
+ np.import_array()
23
+
24
+ cdef floating _euclidean_dense_dense(
25
+ floating* a, # IN
26
+ floating* b, # IN
27
+ int n_features) nogil:
28
+ """ Euclidean distance between a dense and b dense"""
29
+ cdef:
30
+ int i
31
+ int n = n_features // 4
32
+ int rem = n_features % 4
33
+ floating result = 0
34
+
35
+ # We manually unroll the loop for better cache optimization.
36
+ for i in range (n):
37
+ result += ((a[0 ] - b[0 ]) * (a[0 ] - b[0 ])
38
+ + (a[1 ] - b[1 ]) * (a[1 ] - b[1 ])
39
+ + (a[2 ] - b[2 ]) * (a[2 ] - b[2 ])
40
+ + (a[3 ] - b[3 ]) * (a[3 ] - b[3 ]))
41
+ a += 4 ; b += 4
42
+
43
+ for i in range (rem):
44
+ result += (a[i] - b[i]) * (a[i] - b[i])
45
+
46
+ return result
47
+
48
+
49
+
50
+ cpdef np.ndarray[floating] _kmeans_loss(np.ndarray[floating, ndim= 2 , mode= ' c' ] X,
51
+ int [:] labels):
52
+ """ Compute inertia
53
+
54
+ squared distancez between each sample and its assigned center.
55
+ """
56
+ if floating is float :
57
+ dtype = np.float32
58
+ elif floating is double :
59
+ dtype = np.double
60
+
61
+ cdef:
62
+ int n_samples = X.shape[0 ]
63
+ int n_features = X.shape[1 ]
64
+ int i, j
65
+ int n_classes = len (np.unique(labels))
66
+ np.ndarray[floating, ndim= 2 ] centers = np.zeros([n_classes,
67
+ n_features],
68
+ dtype = dtype)
69
+ np.ndarray[long ] num_in_cluster = np.zeros(n_classes, dtype = int )
70
+ np.ndarray[floating] inertias = np.zeros(n_samples, dtype = dtype)
71
+ for i in range (n_samples):
72
+ for j in range (n_features):
73
+ centers[labels[i], j] += X[i, j]
74
+ num_in_cluster[labels[i]] += 1
75
+
76
+ for i in range (n_classes):
77
+ for j in range (n_features):
78
+ centers[i, j] /= num_in_cluster[i]
79
+
80
+ for i in range (n_samples):
81
+ j = labels[i]
82
+ inertias[i] = _euclidean_dense_dense(& X[i, 0 ], & centers[j, 0 ], n_features)
83
+ return inertias
84
+
85
+
86
+
87
+
88
+
89
+ # Regression and Classification losses, from scikit-learn.
90
+
91
+
92
+
93
+
94
+ # ----------------------------------------
95
+ # Extension Types for Loss Functions
96
+ # ----------------------------------------
97
+
98
+ cdef class LossFunction:
99
+ """ Base class for convex loss functions"""
100
+
101
+ cdef double loss(self , double p, double y) nogil:
102
+ """ Evaluate the loss function.
103
+
104
+ Parameters
105
+ ----------
106
+ p : double
107
+ The prediction, p = w^T x
108
+ y : double
109
+ The true value (aka target)
110
+
111
+ Returns
112
+ -------
113
+ double
114
+ The loss evaluated at `p` and `y`.
115
+ """
116
+ return 0.
117
+
118
+ def py_dloss (self , double p , double y ):
119
+ """ Python version of `dloss` for testing.
120
+
121
+ Pytest needs a python function and can't use cdef functions.
122
+ """
123
+ return self .dloss(p, y)
124
+
125
+ def py_loss (self , double p , double y ):
126
+ """ Python version of `dloss` for testing.
127
+
128
+ Pytest needs a python function and can't use cdef functions.
129
+ """
130
+ return self .loss(p, y)
131
+
132
+
133
+ cdef double dloss(self , double p, double y) nogil:
134
+ """ Evaluate the derivative of the loss function with respect to
135
+ the prediction `p`.
136
+
137
+ Parameters
138
+ ----------
139
+ p : double
140
+ The prediction, p = w^T x
141
+ y : double
142
+ The true value (aka target)
143
+ Returns
144
+ -------
145
+ double
146
+ The derivative of the loss function with regards to `p`.
147
+ """
148
+ return 0.
149
+
150
+
151
+ cdef class Regression(LossFunction):
152
+ """ Base class for loss functions for regression"""
1
153
2
154
cdef double loss(self , double p, double y) nogil:
3
155
return 0.
@@ -180,4 +332,4 @@ cdef class Huber(Regression):
180
332
return - self .c
181
333
182
334
def __reduce__ (self ):
183
- return Huber, (self .c,)
335
+ return Huber, (self .c,)
0 commit comments