@@ -232,3 +232,105 @@ def full_grad_sparse(
232
232
233
233
234
234
QuadraticSVC , QuadraticSVC_32 = jit_factory (_QuadraticSVC , spec_quadratic_svc )
235
+
236
+
237
+ spec_huber = [
238
+ ('delta' , float64 ),
239
+ ('lipschitz' , float64 [:])
240
+ ]
241
+
242
+
243
+ class _Huber (BaseDatafit ):
244
+ """Huber datafit.
245
+
246
+ The datafit reads::
247
+
248
+ (1 / n_samples) * sum_{i=1}^{n_samples} f(y_i - Xw_i)
249
+
250
+ where f is the Huber function:
251
+
252
+ f(x) =
253
+ 1 / 2 * x^2 if x <= delta
254
+ delta * |x| - 1/2 * delta^2 if x > delta
255
+
256
+ Attributes
257
+ ----------
258
+ lipschitz : array, shape (n_features,)
259
+ The coordinatewise gradient Lipschitz constants.
260
+
261
+ Note
262
+ ----
263
+ The class _Huber is subsequently decorated with a @jitclass decorator with
264
+ the `jit_factory` function to be compiled. This allows for faster computations
265
+ using Numba JIT compiler.
266
+ """
267
+
268
+ def __init__ (self , delta ):
269
+ self .delta = delta
270
+
271
+ def initialize (self , X , y ):
272
+ n_features = X .shape [1 ]
273
+ self .lipschitz = np .zeros (n_features , dtype = X .dtype )
274
+ for j in range (n_features ):
275
+ self .lipschitz [j ] = (X [:, j ] ** 2 ).sum () / len (y )
276
+
277
+ def initialize_sparse (
278
+ self , X_data , X_indptr , X_indices , y ):
279
+ n_features = len (X_indptr ) - 1
280
+ self .lipschitz = np .zeros (n_features , dtype = X_data .dtype )
281
+ for j in range (n_features ):
282
+ nrm2 = 0.
283
+ for idx in range (X_indptr [j ], X_indptr [j + 1 ]):
284
+ nrm2 += X_data [idx ] ** 2
285
+ self .lipschitz [j ] = nrm2 / len (y )
286
+
287
+ def value (self , y , w , Xw ):
288
+ n_samples = len (y )
289
+ res = 0.
290
+ for i in range (n_samples ):
291
+ tmp = abs (y [i ] - Xw [i ])
292
+ if tmp < self .delta :
293
+ res += 0.5 * tmp ** 2
294
+ else :
295
+ res += self .delta * tmp - 0.5 * self .delta ** 2
296
+ return res / n_samples
297
+
298
+ def gradient_scalar (self , X , y , w , Xw , j ):
299
+ n_samples = len (y )
300
+ grad_j = 0.
301
+ for i in range (n_samples ):
302
+ tmp = y [i ] - Xw [i ]
303
+ if abs (tmp ) < self .delta :
304
+ grad_j += - X [i , j ] * tmp
305
+ else :
306
+ grad_j += - X [i , j ] * np .sign (tmp ) * self .delta
307
+ return grad_j / n_samples
308
+
309
+ def gradient_scalar_sparse (self , X_data , X_indptr , X_indices , y , Xw , j ):
310
+ grad_j = 0.
311
+ for i in range (X_indptr [j ], X_indptr [j + 1 ]):
312
+ tmp = y [X_indices [i ]] - Xw [X_indices [i ]]
313
+ if np .abs (tmp ) < self .delta :
314
+ grad_j += - X_data [i ] * tmp
315
+ else :
316
+ grad_j += - X_data [i ] * np .sign (tmp ) * self .delta
317
+ return grad_j / len (Xw )
318
+
319
+ def full_grad_sparse (
320
+ self , X_data , X_indptr , X_indices , y , Xw ):
321
+ n_features = X_indptr .shape [0 ] - 1
322
+ n_samples = y .shape [0 ]
323
+ grad = np .zeros (n_features , dtype = Xw .dtype )
324
+ for j in range (n_features ):
325
+ grad_j = 0.
326
+ for i in range (X_indptr [j ], X_indptr [j + 1 ]):
327
+ tmp = y [X_indices [i ]] - Xw [X_indices [i ]]
328
+ if np .abs (tmp ) < self .delta :
329
+ grad_j += - X_data [i ] * tmp
330
+ else :
331
+ grad_j += - X_data [i ] * np .sign (tmp ) * self .delta
332
+ grad [j ] = grad_j / n_samples
333
+ return grad
334
+
335
+
336
+ Huber , Huber_32 = jit_factory (_Huber , spec_huber )
0 commit comments