Skip to content

Commit 54b43ae

Browse files
committed
slightly optimise gradient calculations
1 parent 9896769 commit 54b43ae

File tree

2 files changed

+107
-48
lines changed

2 files changed

+107
-48
lines changed

slise/optimisation.py

Lines changed: 100 additions & 42 deletions
Original file line numberDiff line numberDiff line change
@@ -8,7 +8,7 @@
88

99
import numpy as np
1010
from lbfgs import LBFGSError, fmin_lbfgs
11-
from numba import jit, get_num_threads, set_num_threads, threading_layer
11+
from numba import jit, get_num_threads, set_num_threads, threading_layer, float64
1212
from scipy.optimize import brentq
1313

1414
from slise.utils import (
@@ -47,16 +47,16 @@ def loss_smooth(
4747
float: Loss value.
4848
"""
4949
epsilon *= epsilon
50-
distances = ((X @ alpha) - Y) ** 2
51-
subset = sigmoid(beta * (epsilon - distances))
50+
residual2 = ((X @ alpha) - Y) ** 2
51+
subset = sigmoid(beta * (epsilon - residual2))
5252
loss = 0.0
5353
if weight is None:
54-
residuals = np.minimum(0, distances - epsilon * len(Y))
55-
loss += np.sum(subset * residuals) / len(Y)
54+
residual2 = np.minimum(0, residual2 - epsilon * len(Y))
55+
loss += np.sum(subset * residual2) / len(Y)
5656
else:
5757
sumw = np.sum(weight)
58-
residuals = np.minimum(0, distances - epsilon * sumw)
59-
loss += np.sum(subset * residuals * weight) / sumw
58+
residual2 = np.minimum(0, residual2 - epsilon * sumw)
59+
loss += np.sum(subset * residual2 * weight) / sumw
6060
if lambda1 > 0:
6161
loss += lambda1 * np.sum(np.abs(alpha))
6262
if lambda2 > 0:
@@ -98,12 +98,12 @@ def loss_residuals(
9898
"""
9999
subset = 1 / (1 + np.exp(-beta * (epsilon2 - residuals2))) # Sigmoid
100100
if weight is None:
101-
residuals = np.minimum(0, residuals2 - epsilon2 * len(residuals2))
102-
loss = np.sum(subset * residuals) / len(residuals2)
101+
residual2 = np.minimum(0, residuals2 - epsilon2 * len(residuals2))
102+
loss = np.sum(subset * residual2) / len(residuals2)
103103
else:
104104
sumw = np.sum(weight)
105-
residuals = np.minimum(0, residuals2 - epsilon2 * sumw)
106-
loss = np.sum(subset * residuals * weight) / sumw
105+
residual2 = np.minimum(0, residuals2 - epsilon2 * sumw)
106+
loss = np.sum(subset * residual2 * weight) / sumw
107107
if lambda1 > 0:
108108
loss += lambda1 * np.sum(np.abs(alpha))
109109
if lambda2 > 0:
@@ -135,13 +135,13 @@ def loss_sharp(
135135
float: Loss value.
136136
"""
137137
epsilon *= epsilon
138-
distances = (Y - mat_mul_inter(X, alpha)) ** 2
138+
residual2 = (Y - mat_mul_inter(X, alpha)) ** 2
139139
if weight is None:
140-
loss = np.sum(distances[distances <= epsilon] - (epsilon * len(Y))) / len(Y)
140+
loss = np.sum(residual2[residual2 <= epsilon] - (epsilon * len(Y))) / len(Y)
141141
else:
142142
sumw = np.sum(weight)
143-
mask = distances <= epsilon
144-
loss = np.sum((distances[mask] - (epsilon * sumw)) * weight[mask]) / sumw
143+
mask = residual2 <= epsilon
144+
loss = np.sum((residual2[mask] - (epsilon * sumw)) * weight[mask]) / sumw
145145
if lambda1 > 0:
146146
loss += lambda1 * np.sum(np.abs(alpha))
147147
if lambda2 > 0:
@@ -164,9 +164,9 @@ def loss_numba(
164164
epsilon: float,
165165
beta: float,
166166
lambda2: float,
167-
weight: Optional[np.ndarray] = None,
168167
) -> Tuple[float, np.ndarray]:
169168
"""Smoothed version of the SLISE loss ([slise.optimisation.loss_smooth][]), that also calculates the gradient.
169+
For a variant with weights see ([slise.optimisation.loss_numbaw][]).
170170
_This function is sped up with numba._
171171
172172
Args:
@@ -176,31 +176,89 @@ def loss_numba(
176176
epsilon (float): Error tolerance.
177177
beta (float): Sigmoid steepness.
178178
lambda2 (float): Ridge/L2 regularisation coefficient.
179-
weight (Optional[np.ndarray], optional): Weight vector for the data items. Defaults to None.
180179
181180
Returns:
182181
Tuple[float, np.ndarray]: Loss value and gradient vector.
183182
"""
184183
epsilon *= epsilon
185-
distances = (X @ alpha) - Y
186-
distances2 = distances**2
184+
residuals = (X @ alpha) - Y
185+
residual2 = residuals**2
186+
n = residuals.dtype.type(len(Y))
187187
# Loss
188-
subset = 1 / (1 + np.exp(-beta * (epsilon - distances2))) # Sigmoid
189-
n = len(Y) if weight is None else np.sum(weight)
190-
residuals = np.minimum(0, distances2 - (epsilon * n))
191-
if weight is None:
192-
loss = np.sum(subset * residuals) / n
193-
else:
194-
loss = np.sum(subset * residuals * weight) / n
188+
subset = 1 / (1 + np.exp(-beta * (epsilon - residual2))) # Sigmoid
189+
residual2 = np.minimum(0, residual2 - (epsilon * n))
190+
loss = np.sum(subset * residual2) / n
195191
# Gradient
196-
k1 = 2.0 / n
197-
k2 = (-2.0 * beta / n) * (subset - subset**2)
198-
distances[residuals == 0] = 0.0
199-
if weight is None:
200-
grad = ((subset * k1) + (residuals * k2)) * distances
201-
else:
202-
grad = ((subset * k1) + (residuals * k2)) * (distances * weight)
203-
grad = np.expand_dims(grad, 0) @ X
192+
grad = (
193+
np.expand_dims(
194+
subset
195+
* residuals
196+
* (2.0 / n - residual2 * (2.0 * beta / n) * (1.0 - subset))
197+
* (residual2 < 0.0).astype(X.dtype),
198+
0,
199+
)
200+
@ X
201+
)
202+
# Lambda
203+
if lambda2 > 0:
204+
loss = loss + lambda2 * np.sum(alpha * alpha)
205+
grad = grad + (lambda2 * 2) * alpha
206+
return loss, grad
207+
208+
209+
@jit(
210+
nopython=True,
211+
fastmath=True,
212+
parallel=True,
213+
cache=True,
214+
nogil=True,
215+
boundscheck=False,
216+
)
217+
def loss_numbaw(
218+
alpha: np.ndarray,
219+
X: np.ndarray,
220+
Y: np.ndarray,
221+
epsilon: float,
222+
beta: float,
223+
lambda2: float,
224+
weight: np.ndarray,
225+
) -> Tuple[float, np.ndarray]:
226+
"""Smoothed version of the SLISE loss ([slise.optimisation.loss_smooth][]), that also calculates the gradient.
227+
For a variant without weights see ([slise.optimisation.loss_numba][]).
228+
_This function is sped up with numba._
229+
230+
Args:
231+
alpha (np.ndarray): Linear model coefficients.
232+
X (np.ndarray): Data matrix.
233+
Y (np.ndarray): Response vector.
234+
epsilon (float): Error tolerance.
235+
beta (float): Sigmoid steepness.
236+
lambda2 (float): Ridge/L2 regularisation coefficient.
237+
weight (Optional[np.ndarray]): Weight vector for the data items.
238+
239+
Returns:
240+
Tuple[float, np.ndarray]: Loss value and gradient vector.
241+
"""
242+
epsilon *= epsilon
243+
residuals = (X @ alpha) - Y
244+
residual2 = residuals**2
245+
n = np.sum(weight)
246+
# Loss
247+
subset = 1 / (1 + np.exp(-beta * (epsilon - residual2))) # Sigmoid
248+
residual2 = np.minimum(0, residual2 - (epsilon * n))
249+
loss = np.sum(subset * residual2 * weight) / n
250+
# Gradient
251+
grad = (
252+
np.expand_dims(
253+
subset
254+
* residuals
255+
* weight
256+
* (2.0 / n - residual2 * (2.0 * beta / n) * (1.0 - subset))
257+
* (residual2 < 0.0).astype(X.dtype),
258+
0,
259+
)
260+
@ X
261+
)
204262
# Lambda
205263
if lambda2 > 0:
206264
loss = loss + lambda2 * np.sum(alpha * alpha)
@@ -340,12 +398,11 @@ def optimise_loss(
340398
Returns:
341399
np.ndarray: The coefficients of the linear model.
342400
"""
343-
return owlqn(
344-
lambda alpha: loss_numba(alpha, X, Y, epsilon, beta, lambda2, weight),
345-
alpha,
346-
lambda1,
347-
max_iterations,
348-
)
401+
if weight is None:
402+
lf = lambda alpha: loss_numba(alpha, X, Y, epsilon, beta, lambda2)
403+
else:
404+
lf = lambda alpha: loss_numbaw(alpha, X, Y, epsilon, beta, lambda2, weight)
405+
return owlqn(lf, alpha, lambda1, max_iterations)
349406

350407

351408
def log_approximation_ratio(
@@ -548,7 +605,7 @@ def set_threads(num: int = -1) -> int:
548605
"""Set the number of numba threads.
549606
550607
Args:
551-
num (int, optional): The number of threads. Defaults to -1.
608+
num (int, optional): The number of threads (or -1 to keep the old value). Defaults to -1.
552609
553610
Returns:
554611
int: The old number of theads (or -1 if unchanged).
@@ -579,7 +636,8 @@ def check_threading_layer():
579636
try:
580637
if threading_layer() == "workqueue":
581638
warn(
582-
'Using `numba.threading_layer()=="workqueue"` can be devastatingly slow! See https://numba.pydata.org/numba-doc/latest/user/threading-layer.html for alternatives.',
639+
'Using `numba.threading_layer()=="workqueue"` can be devastatingly slow!'
640+
" See https://numba.pydata.org/numba-doc/latest/user/threading-layer.html for alternatives.",
583641
SliseWarning,
584642
)
585643
except ValueError as e:

tests/test_optim.py

Lines changed: 7 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -6,6 +6,7 @@
66
graduated_optimisation,
77
log_approximation_ratio,
88
loss_numba,
9+
loss_numbaw,
910
loss_sharp,
1011
loss_smooth,
1112
matching_epsilon,
@@ -61,10 +62,10 @@ def test_loss():
6162
# With weight
6263
assert loss_smooth(alpha, X, Y, 0.1, weight=w) <= 0
6364
assert loss_sharp(alpha, X, Y, 0.1, weight=w) <= 0
64-
assert loss_numba(alpha, X, Y, 0.1, lambda2=0, beta=0, weight=w)[0] <= 0
65+
assert loss_numbaw(alpha, X, Y, 0.1, lambda2=0, beta=0, weight=w)[0] <= 0
6566
assert loss_smooth(alpha, X, Y, 10, weight=w) < 0
6667
assert loss_sharp(alpha, X, Y, 10, weight=w) < 0
67-
assert loss_numba(alpha, X, Y, 10, lambda2=0, beta=0, weight=w)[0] < 0
68+
assert loss_numbaw(alpha, X, Y, 10, lambda2=0, beta=0, weight=w)[0] < 0
6869
assert np.allclose(
6970
loss_smooth(alpha, X, Y, 0.1, beta=1000000, weight=w),
7071
loss_sharp(alpha, X, Y, 0.1, weight=w),
@@ -78,10 +79,10 @@ def test_loss():
7879
loss_sharp(alpha, X, Y, 0.1, lambda2=0.5, weight=w),
7980
)
8081
assert loss_smooth(alpha, X, Y, 0.1, beta=20, weight=w, lambda2=0.0) == approx(
81-
loss_numba(alpha, X, Y, 0.1, lambda2=0.0, weight=w, beta=20)[0], 1e-8
82+
loss_numbaw(alpha, X, Y, 0.1, lambda2=0.0, weight=w, beta=20)[0], 1e-8
8283
)
8384
assert loss_smooth(alpha, X, Y, 0.1, beta=20, weight=w, lambda2=0.5) == approx(
84-
loss_numba(alpha, X, Y, 0.1, lambda2=0.5, weight=w, beta=20)[0], 1e-8
85+
loss_numbaw(alpha, X, Y, 0.1, lambda2=0.5, weight=w, beta=20)[0], 1e-8
8586
)
8687

8788

@@ -96,9 +97,9 @@ def test_grad_numerically():
9697
)
9798
assert np.allclose(grad, grad2, atol=1e-4)
9899
w = np.random.uniform(size=20)
99-
_, grad = loss_numba(alpha, X, Y, 10, lambda2=0, beta=0, weight=w)
100+
_, grad = loss_numbaw(alpha, X, Y, 10, lambda2=0, beta=0, weight=w)
100101
grad2 = numeric_grad(
101-
alpha, lambda x: loss_numba(x, X, Y, 10, lambda2=0, beta=0, weight=w)[0]
102+
alpha, lambda x: loss_numbaw(x, X, Y, 10, lambda2=0, beta=0, weight=w)[0]
102103
)
103104
assert np.allclose(grad, grad2, atol=1e-4)
104105

0 commit comments

Comments
 (0)