Skip to content

Commit b737a8a

Browse files
typos
1 parent 0b7df6c commit b737a8a

File tree

1 file changed

+153
-1
lines changed

1 file changed

+153
-1
lines changed

sklearn_extra/robust/_robust_weighted_estimator_helper.pyx

Lines changed: 153 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,3 +1,155 @@
1+
# cython: infer_types=True
2+
# Fast swap step in PAM algorithm for k_medoid.
3+
# Author: Timothée Mathieu
4+
# License: 3-clause BSD
5+
6+
cimport cython
7+
import numpy as np
8+
cimport numpy as np
9+
10+
from sklearn.utils.extmath import row_norms
11+
from cython cimport floating
12+
13+
import sys
14+
from time import time
15+
16+
from libc.math cimport exp, log, sqrt, pow, fabs
17+
cimport numpy as np
18+
from numpy.math cimport INFINITY
19+
20+
21+
# Modified from sklearn.cluster._k_means_fast.pyx
22+
np.import_array()
23+
24+
cdef floating _euclidean_dense_dense(
25+
floating* a, # IN
26+
floating* b, # IN
27+
int n_features) nogil:
28+
"""Euclidean distance between a dense and b dense"""
29+
cdef:
30+
int i
31+
int n = n_features // 4
32+
int rem = n_features % 4
33+
floating result = 0
34+
35+
# We manually unroll the loop for better cache optimization.
36+
for i in range(n):
37+
result += ((a[0] - b[0]) * (a[0] - b[0])
38+
+(a[1] - b[1]) * (a[1] - b[1])
39+
+(a[2] - b[2]) * (a[2] - b[2])
40+
+(a[3] - b[3]) * (a[3] - b[3]))
41+
a += 4; b += 4
42+
43+
for i in range(rem):
44+
result += (a[i] - b[i]) * (a[i] - b[i])
45+
46+
return result
47+
48+
49+
50+
cpdef np.ndarray[floating] _kmeans_loss(np.ndarray[floating, ndim=2, mode='c'] X,
51+
int[:] labels):
52+
"""Compute inertia
53+
54+
squared distancez between each sample and its assigned center.
55+
"""
56+
if floating is float:
57+
dtype = np.float32
58+
elif floating is double:
59+
dtype = np.double
60+
61+
cdef:
62+
int n_samples = X.shape[0]
63+
int n_features = X.shape[1]
64+
int i, j
65+
int n_classes = len(np.unique(labels))
66+
np.ndarray[floating, ndim=2] centers = np.zeros([n_classes,
67+
n_features],
68+
dtype = dtype)
69+
np.ndarray[long] num_in_cluster = np.zeros(n_classes, dtype = int)
70+
np.ndarray[floating] inertias = np.zeros(n_samples, dtype = dtype)
71+
for i in range(n_samples):
72+
for j in range(n_features):
73+
centers[labels[i], j] += X[i, j]
74+
num_in_cluster[labels[i]] += 1
75+
76+
for i in range(n_classes):
77+
for j in range(n_features):
78+
centers[i, j] /= num_in_cluster[i]
79+
80+
for i in range(n_samples):
81+
j = labels[i]
82+
inertias[i] = _euclidean_dense_dense(&X[i, 0], &centers[j, 0], n_features)
83+
return inertias
84+
85+
86+
87+
88+
89+
# Regression and Classification losses, from scikit-learn.
90+
91+
92+
93+
94+
# ----------------------------------------
95+
# Extension Types for Loss Functions
96+
# ----------------------------------------
97+
98+
cdef class LossFunction:
99+
"""Base class for convex loss functions"""
100+
101+
cdef double loss(self, double p, double y) nogil:
102+
"""Evaluate the loss function.
103+
104+
Parameters
105+
----------
106+
p : double
107+
The prediction, p = w^T x
108+
y : double
109+
The true value (aka target)
110+
111+
Returns
112+
-------
113+
double
114+
The loss evaluated at `p` and `y`.
115+
"""
116+
return 0.
117+
118+
def py_dloss(self, double p, double y):
119+
"""Python version of `dloss` for testing.
120+
121+
Pytest needs a python function and can't use cdef functions.
122+
"""
123+
return self.dloss(p, y)
124+
125+
def py_loss(self, double p, double y):
126+
"""Python version of `dloss` for testing.
127+
128+
Pytest needs a python function and can't use cdef functions.
129+
"""
130+
return self.loss(p, y)
131+
132+
133+
cdef double dloss(self, double p, double y) nogil:
134+
"""Evaluate the derivative of the loss function with respect to
135+
the prediction `p`.
136+
137+
Parameters
138+
----------
139+
p : double
140+
The prediction, p = w^T x
141+
y : double
142+
The true value (aka target)
143+
Returns
144+
-------
145+
double
146+
The derivative of the loss function with regards to `p`.
147+
"""
148+
return 0.
149+
150+
151+
cdef class Regression(LossFunction):
152+
"""Base class for loss functions for regression"""
1153

2154
cdef double loss(self, double p, double y) nogil:
3155
return 0.
@@ -180,4 +332,4 @@ cdef class Huber(Regression):
180332
return -self.c
181333

182334
def __reduce__(self):
183-
return Huber, (self.c,)
335+
return Huber, (self.c,)

0 commit comments

Comments
 (0)