getting hessian to work for simple formulation of MLE

Transurgeon · Transurgeon · commit 859330a0c231 · 2025-09-10T11:54:23.000+02:00
diff --git a/cvxpy/atoms/affine/binary_operators.py b/cvxpy/atoms/affine/binary_operators.py
@@ -21,6 +21,7 @@
 import numpy as np
 import scipy.sparse as sp
 
+from cvxpy.expressions.variable import Variable
 import cvxpy.lin_ops.lin_op as lo
 import cvxpy.lin_ops.lin_utils as lu
 import cvxpy.utilities as u
@@ -206,6 +207,32 @@ def _grad(self, values):
         DY = sp.kron(sp.eye(p, format='csc'), X, format='csc').T
         
         return [DX, DY]
+    
+    def _hess(self, values):
+        """Compute the Hessian of elementwise multiplication w.r.t. each argument.
+
+        For z = x * y (elementwise), returns:
+        - d2z/dx2 = diag(y)
+        - d2z/dy2 = diag(x)
+
+        Args:
+            values: A list of numeric values for the arguments [x, y].
+
+        Returns:
+            A list of SciPy CSC sparse matrices [D2X, D2Y].
+        """
+        if isinstance(self.args[0], Variable) or isinstance(self.args[1], Variable):
+            return {(self.args[0], self.args[1]): 1.0, 
+                    (self.args[1], self.args[0]): 1.0}
+        x = values[0]
+        y = values[1]
+        # what is the hessian of elementwise multiplication?
+        # Flatten in case inputs are not 1D
+        x = np.asarray(x).flatten(order='F')
+        y = np.asarray(y).flatten(order='F')
+        D2X = sp.diags(y, format='csc')
+        D2Y = sp.diags(x, format='csc')
+        return [D2X, D2Y]
 
     def graph_implementation(
         self, arg_objs, shape: Tuple[int, ...], data=None
@@ -319,6 +346,29 @@ def _grad(self, values):
         DY = sp.diags(x, format='csc')
         return [DX, DY]
 
+    def _hess(self, values):
+        """Compute the Hessian of elementwise multiplication w.r.t. each argument.
+
+        For z = x * y (elementwise), returns:
+        - d2z/dx2 = diag(y)
+        - d2z/dy2 = diag(x)
+
+        Args:
+            values: A list of numeric values for the arguments [x, y].
+
+        Returns:
+            A list of SciPy CSC sparse matrices [D2X, D2Y].
+        """
+        x = values[0]
+        y = values[1]
+        # what is the hessian of elementwise multiplication?
+        # Flatten in case inputs are not 1D
+        x = np.asarray(x).flatten(order='F')
+        y = np.asarray(y).flatten(order='F')
+        D2X = sp.diags(y, format='csc')
+        D2Y = sp.diags(x, format='csc')
+        return [D2X, D2Y]
+
     def graph_implementation(
         self, arg_objs, shape: Tuple[int, ...], data=None
     ) -> Tuple[lo.LinOp, List[Constraint]]:
diff --git a/cvxpy/atoms/atom.py b/cvxpy/atoms/atom.py
@@ -442,12 +442,12 @@ def grad(self):
     @property
     def hess(self):
         from cvxpy.atoms.elementwise.log import log
-
-        print("INSIDE HESS IN ATOM")
+        from cvxpy.atoms.elementwise.power import power
+        # print("INSIDE HESS IN ATOM")
         
         # Short-circuit to all zeros if known to be constant.
-        #if self.is_constant():
-        #    return u.grad.constant_grad(self)
+        if self.is_constant():
+            return u.grad.constant_grad(self)
 
         # Returns None if variable values not supplied.
         arg_values = []
@@ -459,6 +459,9 @@ def hess(self):
 
         # A list of gradients w.r.t. arguments
         hess_self = self._hess(arg_values)
+        if isinstance(hess_self, dict):
+            # print("hess_self is a dict")
+            return hess_self
         # The Chain rule.
         result = {}
         for idx, arg in enumerate(self.args):
@@ -467,7 +470,7 @@ def hess(self):
             hess_arg = arg.hess
             
             for key in hess_arg:
-                if isinstance(self, log):
+                if isinstance(self, (log, power)):
                     hess_arg[key] = 1.0
 
                 # None indicates gradient is not defined.
diff --git a/cvxpy/atoms/elementwise/power.py b/cvxpy/atoms/elementwise/power.py
@@ -380,6 +380,35 @@ def _grad(self, values):
         grad_vals = float(p)*np.power(values[0], float(p)-1)
         return [power.elemwise_grad_to_diag(grad_vals, rows, cols)]
 
+    def _hess(self, values):
+        """TODO: write message
+        """
+        rows = self.args[0].size
+        cols = self.size
+
+        if self.p_rational is not None:
+            p = self.p_rational
+        elif self.p.value is not None:
+            p = self.p.value
+        else:
+            raise ValueError("Cannot compute grad of parametrized power when "
+                             "parameter value is unspecified.")
+
+        if p == 0:
+            # All zeros.
+            return [sp.csc_array((rows, cols), dtype='float64')]
+        # Outside domain or on boundary.
+        if not is_power2(p) and np.min(values[0]) <= 0:
+            if p < 1:
+                # Non-differentiable.
+                return [None]
+            else:
+                # Round up to zero.
+                values[0] = np.maximum(values[0], 0)
+
+        hess_vals = float(p)*float(p-1)*np.power(values[0], float(p)-2)
+        return [power.elemwise_grad_to_diag(hess_vals, rows, cols)]
+
     def _domain(self) -> List[Constraint]:
         """Returns constraints describing the domain of the node.
         """
diff --git a/cvxpy/expressions/constants/constant.py b/cvxpy/expressions/constants/constant.py
@@ -116,6 +116,13 @@ def grad(self):
         """
         return {}
 
+    @property
+    def hess(self):
+        """
+        The hessian of a constant is also an empty dictionary.
+        """
+        return {}
+
     @property
     def shape(self) -> Tuple[int, ...]:
         """Returns the (row, col) dimensions of the expression.
diff --git a/cvxpy/reductions/solvers/nlp_solvers/ipopt_nlpif.py b/cvxpy/reductions/solvers/nlp_solvers/ipopt_nlpif.py
@@ -364,11 +364,11 @@ def hessian(self, x, duals, obj_factor):
             # This is done by looping through each constraint and each
             # pair of variables and summing up the hessian contributions.
             constr_offset = 0
-            print("checkpoint 1")
+            #print("checkpoint 1")
             for constraint in self.problem.constraints:
-                print("checkpoint 2")
+                #print("checkpoint 2")
                 hess_dict = constraint.expr.hess
-                print("checkpoint 3")
+                #print("checkpoint 3")
                 # we have to make sure that the dual variables correspond
                 # to the constraints in the same order
                 constr_dual = duals[constr_offset:constr_offset + constraint.size]
@@ -380,18 +380,17 @@ def hessian(self, x, duals, obj_factor):
                             var_hess = hess_dict[(var1, var2)]
                             if sp.issparse(var_hess):
                                 var_hess = var_hess.toarray()
-                            
                             r1, r2 = var1.size, var2.size
                             # insert the block in the correct location
                             hess_lagrangian[row_offset:row_offset+r1,
                                             col_offset:col_offset+r2] += constr_dual * var_hess
                         col_offset += var2.size
                     row_offset += var1.size
                 constr_offset += constraint.size
-            
-            #print("hess: ", hess)
-            return hess_lagrangian    
-        
+            # return lower triangular part of the hessian
+            row, col = self.hessianstructure()
+            hess_lagrangian = hess_lagrangian[row, col]
+            return hess_lagrangian
 
     class Bounds():
         def __init__(self, problem):
diff --git a/cvxpy/tests/NLP_tests/remove_later.py b/cvxpy/tests/NLP_tests/remove_later.py
@@ -18,9 +18,10 @@
         for method in METHODS:
             print("Method, n, scale factor: ", method, n, factor)
             if method == 1:
-                sigma = cp.Variable((1, ))
+                sigma = cp.Variable((1,))
                 obj = (n / 2) * cp.log(2*np.pi*cp.square(sigma)) + (1 / (2 * cp.square(sigma))) * res
                 constraints = []
+            """
             elif method == 2:
                 sigma2 = cp.Variable((1, ))
                 obj = (n / 2) * cp.log( 2 * np.pi * sigma2) + (1 / (2 * sigma2)) * res
@@ -39,16 +40,12 @@
                 sigma = cp.Variable((1, ))
                 obj = n  * cp.log(np.sqrt(2*np.pi)*sigma * -1 * -1 * 2 * 0.5) + (1 / (2 * cp.square(sigma))) * res
                 constraints = []
-
+            """
             problem = cp.Problem(cp.Minimize(obj), constraints)
             problem.solve(solver=cp.IPOPT, nlp=True)
-
-
-            reduction = Expr2Smooth(problem)
-            new_prob, inv_data = reduction.apply(problem)
-            print(str(new_prob))
-
-
+            #reduction = Expr2Smooth(problem)
+            #new_prob, inv_data = reduction.apply(problem)
+            #print(str(new_prob))
             print("sigma.value: ", sigma.value)
             print("sigma_opt: ", sigma_opt)
-            assert(np.abs(sigma.value - sigma_opt) / np.max([1, np.abs(sigma_opt)]) <= TOL)
+            assert(np.abs(sigma.value - sigma_opt) / np.max([1, np.abs(sigma_opt)]) <= TOL)
diff --git a/cvxpy/tests/test_nlp_solvers.py b/cvxpy/tests/test_nlp_solvers.py