Fix: Incorrect use of partial in TweedieDistribution._rowwise_gradient_hessian (#889)

bili2002 · stanmart · lbittarello · web-flow · commit 3e483b92f759 · 2025-01-09T14:59:15.000+01:00
* fix

* Add changelog

* fix

* Add tests demonstrating the issue

* Use inv_gaussian_log_* functions when appropriate

* Fix inverse gaussian derivatives

* Update changelog

* Update CHANGELOG.rst

Co-authored-by: Luca Bittarello &lt;15511539+lbittarello@users.noreply.github.com&gt;

* Clarify why we are using the FIM

* Add test against glmnet

---------

Co-authored-by: Martin Stancsics &lt;martin.stancsics@quantco.com&gt;
Co-authored-by: Luca Bittarello &lt;15511539+lbittarello@users.noreply.github.com&gt;
diff --git a/CHANGELOG.rst b/CHANGELOG.rst
@@ -10,6 +10,11 @@ Changelog
 UNRELEASED
 ----------
 
+**Bug fix:
+
+- Fixed a bug where :meth:`~glum.TweedieDistribution._rowwise_gradient_hessian` and :meth:`~glum.TweedieDistribution._eta_mu_deviance` would call functions with wrong arguments in the ``p = 3`` case.
+- Fixed :class:`glum.InverseGaussianDistribution` not using the optimized gradient, Hessian and deviance implementations, as well as those derivatives having the wrong sign.
+
 **Other changes:**
 
 - Build and test with Python 3.13 in CI.
diff --git a/src/glum/_distribution.py b/src/glum/_distribution.py
@@ -671,8 +671,8 @@ def _rowwise_gradient_hessian(
             f = gamma_log_rowwise_gradient_hessian
         elif 1 < self.power < 2 and isinstance(link, LogLink):
             f = partial(tweedie_log_rowwise_gradient_hessian, p=self.power)
-        elif self.power == 3:
-            f = partial(inv_gaussian_log_rowwise_gradient_hessian, p=self.power)
+        elif self.power == 3 and isinstance(link, LogLink):
+            f = inv_gaussian_log_rowwise_gradient_hessian
 
         if f is not None:
             return f(y, sample_weight, eta, mu, gradient_rows, hessian_rows)
@@ -703,7 +703,7 @@ def _eta_mu_deviance(
         elif 1 < self.power < 2 and isinstance(link, LogLink):
             f = partial(tweedie_log_eta_mu_deviance, p=self.power)
         elif self.power == 3 and isinstance(link, LogLink):
-            f = partial(inv_gaussian_log_eta_mu_deviance, p=self.power)
+            f = inv_gaussian_log_eta_mu_deviance
 
         if f is not None:
             return f(cur_eta, X_dot_d, y, sample_weight, eta_out, mu_out, factor)
@@ -1153,6 +1153,11 @@ def unit_deviance(self, y, mu):  # noqa D
     def _rowwise_gradient_hessian(
         self, link, y, sample_weight, eta, mu, gradient_rows, hessian_rows
     ):
+        if isinstance(link, LogLink):
+            return inv_gaussian_log_rowwise_gradient_hessian(
+                y, sample_weight, eta, mu, gradient_rows, hessian_rows
+            )
+
         return super()._rowwise_gradient_hessian(
             link, y, sample_weight, eta, mu, gradient_rows, hessian_rows
         )
@@ -1169,8 +1174,8 @@ def _eta_mu_deviance(
         mu_out,
     ):
         if isinstance(link, LogLink):
-            return tweedie_log_eta_mu_deviance(
-                cur_eta, X_dot_d, y, sample_weight, eta_out, mu_out, factor, p=3.0
+            return inv_gaussian_log_eta_mu_deviance(
+                cur_eta, X_dot_d, y, sample_weight, eta_out, mu_out, factor
             )
 
         return super()._eta_mu_deviance(
diff --git a/src/glum/_functions.pyx b/src/glum/_functions.pyx
@@ -276,8 +276,10 @@ def inv_gaussian_log_rowwise_gradient_hessian(
         inv_mu = 1 / mu[i]
         inv_mu2 = inv_mu ** 2
 
-        gradient_rows_out[i] = 2 * weights[i] * (inv_mu - y[i] * inv_mu2)
-        hessian_rows_out[i] = 2 * weights[i] * (inv_mu - 2 * y[i] * inv_mu2)
+        gradient_rows_out[i] = weights[i] * (y[i] * inv_mu2 - inv_mu)
+        # Use the FIM instead of the true Hessian, as the latter is not
+        # necessarily positive definite.
+        hessian_rows_out[i] = weights[i] * inv_mu
 
 def inv_gaussian_log_likelihood(
     const_floating1d y,
diff --git a/tests/glm/test_distribution.py b/tests/glm/test_distribution.py
@@ -170,6 +170,7 @@ def test_deviance_zero(family, chk_values):
         (InverseGaussianDistribution(), LogLink()),
         (TweedieDistribution(power=1.5), LogLink()),
         (TweedieDistribution(power=2.5), LogLink()),
+        (TweedieDistribution(power=3), LogLink()),
         (BinomialDistribution(), LogitLink()),
         (TweedieDistribution(power=1.5), TweedieLink(1.5)),
         (TweedieDistribution(power=2.5), TweedieLink(2.5)),
@@ -228,6 +229,7 @@ def f(coef2):
         (GammaDistribution(), LogLink(), True),
         (InverseGaussianDistribution(), LogLink(), False),
         (TweedieDistribution(power=1.5), LogLink(), True),
+        (TweedieDistribution(power=3), LogLink(), False),
         (TweedieDistribution(power=4.5), LogLink(), False),
         (NegativeBinomialDistribution(theta=1.0), LogLink(), False),
     ],
diff --git a/tests/glm/test_glm.py b/tests/glm/test_glm.py
@@ -1270,6 +1270,80 @@ def test_binomial_cloglog_unregularized(solver):
     assert_allclose(glum_glm.coef_, sm_fit.params[1:], rtol=2e-5)
 
 
+def test_inv_gaussian_log_enet():
+    """Test elastic net regression with inverse gaussian family and log link.
+
+    Compare to R's glmnet
+    """
+    # library(glmnet)
+    # options(digits=10)
+    # df <- data.frame(a=c(1,2,3,4,5,6), b=c(0,0,0,0,1, 1), y=cy=c(.2,.5,.8,.3,.9,.9))
+    # x <- data.matrix(df[,c("a", "b")])
+    # y <- df$y
+    # fit <- glmnet(
+    #     x=x, y=y, lambda=1, alpha=0.5, intercept=TRUE,
+    #     family=inv.gaussian(link=log),standardize=FALSE, thresh=1e-10
+    # )
+    # coef(fit)
+    #                       s0
+    # (Intercept) -1.028655076
+    # a            0.123000467
+    # b            .
+    glmnet_intercept = -1.028655076
+    glmnet_coef = [0.123000467, 0.0]
+    X = np.array([[1, 2, 3, 4, 5, 6], [0, 0, 0, 0, 1, 1]], dtype="float").T
+    y = np.array([0.2, 0.5, 0.8, 0.3, 0.9, 0.9])
+    rng = np.random.RandomState(42)
+    glm = GeneralizedLinearRegressor(
+        alpha=1,
+        l1_ratio=0.5,
+        family="inverse.gaussian",
+        link="log",
+        solver="irls-cd",
+        gradient_tol=1e-8,
+        selection="random",
+        random_state=rng,
+    )
+    glm.fit(X, y)
+    assert_allclose(glm.intercept_, glmnet_intercept, rtol=1e-3)
+    assert_allclose(glm.coef_, glmnet_coef, rtol=1e-3)
+
+    # same for start_params='zero' and selection='cyclic'
+    # with reduced precision
+    glm = GeneralizedLinearRegressor(
+        alpha=1,
+        l1_ratio=0.5,
+        family="inverse.gaussian",
+        link="log",
+        solver="irls-cd",
+        gradient_tol=1e-8,
+        selection="cyclic",
+    )
+    glm.fit(X, y)
+    assert_allclose(glm.intercept_, glmnet_intercept, rtol=1e-3)
+    assert_allclose(glm.coef_, glmnet_coef, rtol=1e-3)
+
+    # check warm_start, therefore start with different alpha
+    glm = GeneralizedLinearRegressor(
+        alpha=0.005,
+        l1_ratio=0.5,
+        family="inverse.gaussian",
+        max_iter=300,
+        link="log",
+        solver="irls-cd",
+        gradient_tol=1e-8,
+        selection="cyclic",
+    )
+    glm.fit(X, y)
+    # warm start with original alpha and use of sparse matrices
+    glm.warm_start = True
+    glm.alpha = 1
+    X = sparse.csr_matrix(X)
+    glm.fit(X, y)
+    assert_allclose(glm.intercept_, glmnet_intercept, rtol=1e-3)
+    assert_allclose(glm.coef_, glmnet_coef, rtol=1e-3)
+
+
 @pytest.mark.parametrize("alpha", [0.01, 0.1, 1, 10])
 def test_binomial_enet(alpha):
     """Test elastic net regression with binomial family and LogitLink.