Skip to content

Commit 3e483b9

Browse files
bili2002stanmartlbittarello
authored
Fix: Incorrect use of partial in TweedieDistribution._rowwise_gradient_hessian (#889)
* fix * Add changelog * fix * Add tests demonstrating the issue * Use inv_gaussian_log_* functions when appropriate * Fix inverse gaussian derivatives * Update changelog * Update CHANGELOG.rst Co-authored-by: Luca Bittarello <15511539+lbittarello@users.noreply.github.com> * Clarify why we are using the FIM * Add test against glmnet --------- Co-authored-by: Martin Stancsics <martin.stancsics@quantco.com> Co-authored-by: Luca Bittarello <15511539+lbittarello@users.noreply.github.com>
1 parent c3f1bae commit 3e483b9

File tree

5 files changed

+95
-7
lines changed

5 files changed

+95
-7
lines changed

CHANGELOG.rst

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -10,6 +10,11 @@ Changelog
1010
UNRELEASED
1111
----------
1212

13+
**Bug fix:
14+
15+
- Fixed a bug where :meth:`~glum.TweedieDistribution._rowwise_gradient_hessian` and :meth:`~glum.TweedieDistribution._eta_mu_deviance` would call functions with wrong arguments in the ``p = 3`` case.
16+
- Fixed :class:`glum.InverseGaussianDistribution` not using the optimized gradient, Hessian and deviance implementations, as well as those derivatives having the wrong sign.
17+
1318
**Other changes:**
1419

1520
- Build and test with Python 3.13 in CI.

src/glum/_distribution.py

Lines changed: 10 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -671,8 +671,8 @@ def _rowwise_gradient_hessian(
671671
f = gamma_log_rowwise_gradient_hessian
672672
elif 1 < self.power < 2 and isinstance(link, LogLink):
673673
f = partial(tweedie_log_rowwise_gradient_hessian, p=self.power)
674-
elif self.power == 3:
675-
f = partial(inv_gaussian_log_rowwise_gradient_hessian, p=self.power)
674+
elif self.power == 3 and isinstance(link, LogLink):
675+
f = inv_gaussian_log_rowwise_gradient_hessian
676676

677677
if f is not None:
678678
return f(y, sample_weight, eta, mu, gradient_rows, hessian_rows)
@@ -703,7 +703,7 @@ def _eta_mu_deviance(
703703
elif 1 < self.power < 2 and isinstance(link, LogLink):
704704
f = partial(tweedie_log_eta_mu_deviance, p=self.power)
705705
elif self.power == 3 and isinstance(link, LogLink):
706-
f = partial(inv_gaussian_log_eta_mu_deviance, p=self.power)
706+
f = inv_gaussian_log_eta_mu_deviance
707707

708708
if f is not None:
709709
return f(cur_eta, X_dot_d, y, sample_weight, eta_out, mu_out, factor)
@@ -1153,6 +1153,11 @@ def unit_deviance(self, y, mu): # noqa D
11531153
def _rowwise_gradient_hessian(
11541154
self, link, y, sample_weight, eta, mu, gradient_rows, hessian_rows
11551155
):
1156+
if isinstance(link, LogLink):
1157+
return inv_gaussian_log_rowwise_gradient_hessian(
1158+
y, sample_weight, eta, mu, gradient_rows, hessian_rows
1159+
)
1160+
11561161
return super()._rowwise_gradient_hessian(
11571162
link, y, sample_weight, eta, mu, gradient_rows, hessian_rows
11581163
)
@@ -1169,8 +1174,8 @@ def _eta_mu_deviance(
11691174
mu_out,
11701175
):
11711176
if isinstance(link, LogLink):
1172-
return tweedie_log_eta_mu_deviance(
1173-
cur_eta, X_dot_d, y, sample_weight, eta_out, mu_out, factor, p=3.0
1177+
return inv_gaussian_log_eta_mu_deviance(
1178+
cur_eta, X_dot_d, y, sample_weight, eta_out, mu_out, factor
11741179
)
11751180

11761181
return super()._eta_mu_deviance(

src/glum/_functions.pyx

Lines changed: 4 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -276,8 +276,10 @@ def inv_gaussian_log_rowwise_gradient_hessian(
276276
inv_mu = 1 / mu[i]
277277
inv_mu2 = inv_mu ** 2
278278

279-
gradient_rows_out[i] = 2 * weights[i] * (inv_mu - y[i] * inv_mu2)
280-
hessian_rows_out[i] = 2 * weights[i] * (inv_mu - 2 * y[i] * inv_mu2)
279+
gradient_rows_out[i] = weights[i] * (y[i] * inv_mu2 - inv_mu)
280+
# Use the FIM instead of the true Hessian, as the latter is not
281+
# necessarily positive definite.
282+
hessian_rows_out[i] = weights[i] * inv_mu
281283

282284
def inv_gaussian_log_likelihood(
283285
const_floating1d y,

tests/glm/test_distribution.py

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -170,6 +170,7 @@ def test_deviance_zero(family, chk_values):
170170
(InverseGaussianDistribution(), LogLink()),
171171
(TweedieDistribution(power=1.5), LogLink()),
172172
(TweedieDistribution(power=2.5), LogLink()),
173+
(TweedieDistribution(power=3), LogLink()),
173174
(BinomialDistribution(), LogitLink()),
174175
(TweedieDistribution(power=1.5), TweedieLink(1.5)),
175176
(TweedieDistribution(power=2.5), TweedieLink(2.5)),
@@ -228,6 +229,7 @@ def f(coef2):
228229
(GammaDistribution(), LogLink(), True),
229230
(InverseGaussianDistribution(), LogLink(), False),
230231
(TweedieDistribution(power=1.5), LogLink(), True),
232+
(TweedieDistribution(power=3), LogLink(), False),
231233
(TweedieDistribution(power=4.5), LogLink(), False),
232234
(NegativeBinomialDistribution(theta=1.0), LogLink(), False),
233235
],

tests/glm/test_glm.py

Lines changed: 74 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1270,6 +1270,80 @@ def test_binomial_cloglog_unregularized(solver):
12701270
assert_allclose(glum_glm.coef_, sm_fit.params[1:], rtol=2e-5)
12711271

12721272

1273+
def test_inv_gaussian_log_enet():
1274+
"""Test elastic net regression with inverse gaussian family and log link.
1275+
1276+
Compare to R's glmnet
1277+
"""
1278+
# library(glmnet)
1279+
# options(digits=10)
1280+
# df <- data.frame(a=c(1,2,3,4,5,6), b=c(0,0,0,0,1, 1), y=cy=c(.2,.5,.8,.3,.9,.9))
1281+
# x <- data.matrix(df[,c("a", "b")])
1282+
# y <- df$y
1283+
# fit <- glmnet(
1284+
# x=x, y=y, lambda=1, alpha=0.5, intercept=TRUE,
1285+
# family=inv.gaussian(link=log),standardize=FALSE, thresh=1e-10
1286+
# )
1287+
# coef(fit)
1288+
# s0
1289+
# (Intercept) -1.028655076
1290+
# a 0.123000467
1291+
# b .
1292+
glmnet_intercept = -1.028655076
1293+
glmnet_coef = [0.123000467, 0.0]
1294+
X = np.array([[1, 2, 3, 4, 5, 6], [0, 0, 0, 0, 1, 1]], dtype="float").T
1295+
y = np.array([0.2, 0.5, 0.8, 0.3, 0.9, 0.9])
1296+
rng = np.random.RandomState(42)
1297+
glm = GeneralizedLinearRegressor(
1298+
alpha=1,
1299+
l1_ratio=0.5,
1300+
family="inverse.gaussian",
1301+
link="log",
1302+
solver="irls-cd",
1303+
gradient_tol=1e-8,
1304+
selection="random",
1305+
random_state=rng,
1306+
)
1307+
glm.fit(X, y)
1308+
assert_allclose(glm.intercept_, glmnet_intercept, rtol=1e-3)
1309+
assert_allclose(glm.coef_, glmnet_coef, rtol=1e-3)
1310+
1311+
# same for start_params='zero' and selection='cyclic'
1312+
# with reduced precision
1313+
glm = GeneralizedLinearRegressor(
1314+
alpha=1,
1315+
l1_ratio=0.5,
1316+
family="inverse.gaussian",
1317+
link="log",
1318+
solver="irls-cd",
1319+
gradient_tol=1e-8,
1320+
selection="cyclic",
1321+
)
1322+
glm.fit(X, y)
1323+
assert_allclose(glm.intercept_, glmnet_intercept, rtol=1e-3)
1324+
assert_allclose(glm.coef_, glmnet_coef, rtol=1e-3)
1325+
1326+
# check warm_start, therefore start with different alpha
1327+
glm = GeneralizedLinearRegressor(
1328+
alpha=0.005,
1329+
l1_ratio=0.5,
1330+
family="inverse.gaussian",
1331+
max_iter=300,
1332+
link="log",
1333+
solver="irls-cd",
1334+
gradient_tol=1e-8,
1335+
selection="cyclic",
1336+
)
1337+
glm.fit(X, y)
1338+
# warm start with original alpha and use of sparse matrices
1339+
glm.warm_start = True
1340+
glm.alpha = 1
1341+
X = sparse.csr_matrix(X)
1342+
glm.fit(X, y)
1343+
assert_allclose(glm.intercept_, glmnet_intercept, rtol=1e-3)
1344+
assert_allclose(glm.coef_, glmnet_coef, rtol=1e-3)
1345+
1346+
12731347
@pytest.mark.parametrize("alpha", [0.01, 0.1, 1, 10])
12741348
def test_binomial_enet(alpha):
12751349
"""Test elastic net regression with binomial family and LogitLink.

0 commit comments

Comments
 (0)