Merge branch 'master' into bounds_investigation

Transurgeon · web-flow · commit 6c54fe57f6c5 · 2025-09-05T00:11:25.000-07:00
diff --git a/cvxpy/atoms/affine/affine_atom.py b/cvxpy/atoms/affine/affine_atom.py
@@ -109,58 +109,54 @@ def is_nsd(self) -> bool:
         return True
 
     def _grad(self, values) -> List[Any]:
-        """Gives the (sub/super)gradient of the atom w.r.t. each argument.
+        """Computes the gradient of the affine atom w.r.t. each argument.
 
-        Matrix expressions are vectorized, so the gradient is a matrix.
+        For affine atoms, the gradient is constant and independent of argument values.
+        We compute it by constructing the canonical matrix representation and extracting
+        the linear coefficients.
 
         Args:
-            values: A list of numeric values for the arguments.
+            values: Argument values (unused for affine atoms).
 
         Returns:
-            A list of SciPy CSC sparse matrices or None.
+            List of gradient matrices, one for each argument.
         """
-        # TODO should be a simple function in cvxcore for this.
-        # Make a fake lin op tree for the function.
+        # Create fake variables for each non-constant argument to build the linear system
         fake_args = []
         var_offsets = {}
-        offset = 0
+        var_length = 0
+        
         for idx, arg in enumerate(self.args):
             if arg.is_constant():
-                fake_args += [Constant(arg.value).canonical_form[0]]
+                fake_args.append(Constant(arg.value).canonical_form[0])
             else:
-                fake_args += [lu.create_var(arg.shape, idx)]
-                var_offsets[idx] = offset
-                offset += arg.size
-        var_length = offset
-        fake_expr, _ = self.graph_implementation(fake_args, self.shape,
-                                                 self.get_data())
-        param_to_size = {lo.CONSTANT_ID: 1}
-        param_to_col = {lo.CONSTANT_ID: 0}
-        # Get the matrix representation of the function.
+                fake_args.append(lu.create_var(arg.shape, idx))
+                var_offsets[idx] = var_length
+                var_length += arg.size
+
+        # Get the canonical matrix representation: f(x) = Ax + b
+        fake_expr, _ = self.graph_implementation(fake_args, self.shape, self.get_data())
         canon_mat = canonInterface.get_problem_matrix(
-            [fake_expr],
-            var_length,
-            var_offsets,
-            param_to_size,
-            param_to_col,
-            self.size,
+            [fake_expr], var_length, var_offsets,
+            {lo.CONSTANT_ID: 1}, {lo.CONSTANT_ID: 0}, self.size
         )
-        # HACK TODO TODO convert tensors back to vectors.
-        # COO = (V[lo.CONSTANT_ID][0], (J[lo.CONSTANT_ID][0], I[lo.CONSTANT_ID][0]))
-        shape = (var_length + 1, self.size)
-        stacked_grad = canon_mat.reshape(shape).tocsc()[:-1, :]
-        # Break up into per argument matrices.
+
+        # Extract gradient matrix A (exclude constant offset b)
+        grad_matrix = canon_mat.reshape((var_length + 1, self.size)).tocsc()[:-1, :]
+
+        # Split gradients by argument
         grad_list = []
-        start = 0
+        var_start = 0
         for arg in self.args:
             if arg.is_constant():
-                grad_shape = (arg.size, shape[1])
-                if grad_shape == (1, 1):
-                    grad_list += [0]
-                else:
-                    grad_list += [sp.coo_matrix(grad_shape, dtype='float64')]
+                # Zero gradient for constants
+                grad_shape = (arg.size, self.size)
+                grad_list.append(0 if grad_shape == (1, 1) else 
+                               sp.coo_matrix(grad_shape, dtype='float64'))
             else:
-                stop = start + arg.size
-                grad_list += [stacked_grad[start:stop, :]]
-                start = stop
+                # Extract gradient block for this variable
+                var_end = var_start + arg.size
+                grad_list.append(grad_matrix[var_start:var_end, :])
+                var_start = var_end
+
         return grad_list
diff --git a/cvxpy/atoms/atom.py b/cvxpy/atoms/atom.py
@@ -439,6 +439,16 @@ def grad(self):
 
         return result
 
+    def hess(self, duals):
+        """
+        Compute the hessian-vector product of a scalar function with dual
+        values.
+        Here the dual values should correspond to the dual values of the
+        constraint function or None if we are taking the hessian of the objective.
+        We must first slice the duals array to get the relevant values.
+        """
+        pass
+
     @abc.abstractmethod
     def _grad(self, values):
         """Gives the (sub/super)gradient of the atom w.r.t. each argument.
diff --git a/cvxpy/reductions/expr2smooth/canonicalizers/log_canon.py b/cvxpy/reductions/expr2smooth/canonicalizers/log_canon.py
@@ -14,6 +14,9 @@
 limitations under the License.
 """
 
+import numpy as np
+
+from cvxpy.expressions.constants import Constant
 from cvxpy.expressions.variable import Variable
 from cvxpy.expressions.constants import Constant
 import numpy as np
@@ -34,6 +37,21 @@ def collect_constant_and_variable(expr, constants, variable):
 # Perhaps this should be a parameter exposed to the user?
 LOWER_BOUND = 1e-5
 
+def collect_constant_and_variable(expr, constants, variable):
+    if isinstance(expr, Constant):
+        constants.append(expr)
+    elif isinstance(expr, Variable):
+        variable.append(expr)
+    elif hasattr(expr, "args"):
+        for subexpr in expr.args:
+            collect_constant_and_variable(subexpr, constants, variable)
+
+    assert(len(variable) <= 1)
+
+# DCED: Without this lower bound the stress test for ML Gaussian non-zero mean fails.
+# Perhaps this should be a parameter exposed to the user?
+LOWER_BOUND = 1e-5
+
 def log_canon(expr, args):
     t = Variable(args[0].size, bounds=[LOWER_BOUND, None])
 
diff --git a/cvxpy/reductions/expr2smooth/canonicalizers/power_canon.py b/cvxpy/reductions/expr2smooth/canonicalizers/power_canon.py
@@ -42,11 +42,13 @@ def power_canon(expr, args):
             #return t, [t**(1/p) == x, t >= 0]
         elif p > 1:
             t = Variable(args[0].shape)
-            if args[0].value is not None:
+
+            # DCED: for Gaussian ML it works better if we include the second
+            #       condition here
+            if args[0].value is not None and np.all(args[0].value >= 1):
                 t.value = args[0].value
             else:
                 t.value = expr.point_in_domain()
-                #t.value = np.zeros(shape)
 
             return expr.copy([t]), [t==args[0]]
         else:
diff --git a/cvxpy/reductions/solvers/nlp_solvers/ipopt_nlpif.py b/cvxpy/reductions/solvers/nlp_solvers/ipopt_nlpif.py
@@ -213,7 +213,6 @@ def __init__(self, problem, inital_point):
 
         def objective(self, x):
             """Returns the scalar value of the objective given x."""
-            # Set the variable value
             offset = 0
             for var in self.main_var:
                 size = var.size
@@ -227,7 +226,6 @@ def gradient(self, x):
             #import pdb 
             #pdb.set_trace()
             """Returns the gradient of the objective with respect to x."""
-            # compute the gradient using _grad
             offset = 0
             for var in self.main_var:
                 size = var.size
@@ -248,15 +246,13 @@ def gradient(self, x):
 
         def constraints(self, x):
             """Returns the constraint values."""
-            # Set the variable value
             offset = 0
             #import pdb 
             #pdb.set_trace()
             for var in self.main_var:
                 size = var.size
                 var.value = x[offset:offset+size].reshape(var.shape, order='F')
                 offset += size
-            
             # Evaluate all constraints
             constraint_values = []
             for constraint in self.problem.constraints:
@@ -265,15 +261,12 @@ def constraints(self, x):
 
         def jacobian(self, x):
             """Returns only the non-zero values of the Jacobian."""
-            #import pdb 
-            #pdb.set_trace()
             # Set variable values
             offset = 0
             for var in self.main_var:
                 size = var.size
                 var.value = x[offset:offset+size].reshape(var.shape, order='F')
                 offset += size
-            
             values = []
             for constraint in self.problem.constraints:
                 # get the jacobian of the constraint
@@ -329,9 +322,44 @@ def jacobianstructure(self):
                     col_offset += var.size
                 row_offset += constraint.size
                 self.jacobian_idxs[constraint] = constraint_jac
-
             return (np.array(rows), np.array(cols))
 
+        def hessian(self, x, duals, obj_factor):
+            offset = 0
+            for var in self.main_var:
+                size = var.size
+                var.value = x[offset:offset+size].reshape(var.shape, order='F')
+                offset += size
+            hess = np.zeros((x.size, x.size), dtype=np.float64)
+            # hess_dict = self.problem.objective.expr.hess(obj_factor)
+            # if we specify the problem in graph form (i.e. t=obj),
+            # the objective hessian will always be zero.
+            # To compute the hessian of the constraints, we need to gather the
+            # second derivatives from each constraint.
+            # This is done by looping through each constraint and each
+            # pair of variables and summing up the hessian contributions.
+            constr_offset = 0
+            for constraint in self.problem.constraints:
+                constr_hess = constraint.expr.hess()
+                # we have to make sure that the dual variables correspond
+                # to the constraints in the same order
+                constr_dual = duals[constr_offset:constr_offset + constraint.size]
+                row_offset = 0
+                for var1 in self.main_var:
+                    col_offset = 0
+                    for var2 in self.main_var:
+                        hess[var1.index * row_offset, var2.index * col_offset] += (
+                            constr_dual * constr_hess.get((var1, var2), 0).toarray()
+                        )
+                        col_offset += var2.size
+                    row_offset += var1.size
+                constr_offset += constraint.size
+            return hess
+
+        def hessianstructure(self):
+            pass
+
+
     class Bounds():
         def __init__(self, problem):
             self.problem = problem
diff --git a/cvxpy/sandbox/nmf_fro.py b/cvxpy/sandbox/nmf_fro.py
@@ -0,0 +1,20 @@
+import numpy as np
+
+import cvxpy as cp
+
+np.random.seed(1)
+
+n, m, k = 5, 4, 2
+noise_level = 0.05
+X_true = np.random.rand(n, k)
+Y_true = np.random.rand(k, m)
+A_noisy = X_true @ Y_true + noise_level * np.random.randn(n, m)
+A_noise = np.clip(A_noisy, 0, None)
+# initialize X and Y to random nonnegative values
+X = cp.Variable((n, k), bounds=[0, np.inf])
+X.value = np.ones((n, k))
+Y = cp.Variable((k, m), bounds=[0, np.inf])
+Y.value = np.ones((k, m))
+obj = cp.sum(cp.square(A_noise - X @ Y))
+prob = cp.Problem(cp.Minimize(obj))
+prob.solve(solver=cp.IPOPT, nlp=True, verbose=True)
diff --git a/cvxpy/sandbox/tests_problem_references/NMF_fro.py b/cvxpy/sandbox/tests_problem_references/NMF_fro.py
@@ -19,4 +19,3 @@
 obj = cp.sum(cp.square(A_noisy - X @ Y))
 problem = cp.Problem(cp.Minimize(obj))
 problem.solve(solver=cp.IPOPT, nlp=True, verbose=True)
-
diff --git a/cvxpy/tests/NLP_tests/test_ML_Gaussian_stress.py b/cvxpy/tests/NLP_tests/test_ML_Gaussian_stress.py
@@ -4,7 +4,6 @@
 
 # TODO (DCED): should try eg. student-t regression
 
-
 class TestStressMLE():
     
     def test_zero_mean(self):