cvxgrp
diff --git a/‎cvxpy/atoms/affine/binary_operators.py‎
Lines changed: 56 additions & 35 deletions b/‎cvxpy/atoms/affine/binary_operators.py‎
Lines changed: 56 additions & 35 deletions
diff --git a/‎cvxpy/atoms/affine/cumsum.py‎
Lines changed: 70 additions & 74 deletions b/‎cvxpy/atoms/affine/cumsum.py‎
Lines changed: 70 additions & 74 deletions
diff --git a/‎cvxpy/atoms/affine/diff.py‎
Lines changed: 42 additions & 20 deletions b/‎cvxpy/atoms/affine/diff.py‎
Lines changed: 42 additions & 20 deletions
@@ -201,6 +201,12 @@ def _grad(self, values):
         """Gives the (sub/super)gradient of the atom w.r.t. each argument.
 
         Matrix expressions are vectorized, so the gradient is a matrix.
+        CVXPY convention: grad[i, j] = d(output[j]) / d(input[i])
+        Uses Fortran (column-major) ordering for vectorization.
+
+        For matrix multiplication C = X @ Y:
+        - grad_X = kron(Y, I_m) where m = X.shape[0]
+        - grad_Y = kron(I_n, X).T where n = Y.shape[1] (or 1 for vectors)
 
         Args:
             values: A list of numeric values for the arguments.
@@ -211,29 +217,37 @@ def _grad(self, values):
         if self.args[0].is_constant() or self.args[1].is_constant():
             return super(MulExpression, self)._grad(values)
 
-        # TODO(akshayka): Verify that the following code is correct for
-        # non-affine arguments.
-        X = values[0]
-        Y = values[1]
+        X = np.atleast_2d(values[0])
+        Y = np.atleast_2d(values[1])
 
-        DX_rows = self.args[0].size
-        cols = self.args[0].size
+        # Handle 1D shapes: promote to 2D for consistent Kronecker computation
+        x_shape = self.args[0].shape
+        y_shape = self.args[1].shape
 
         # dot product of two vectors with shape (n,)
-        if len(self.args[0].shape) == 1 and len(self.args[1].shape) == 1:
-            DX = sp.csc_array(Y.reshape(-1, 1))  # y as column vector
-            DY = sp.csc_array(X.reshape(-1, 1))  # x as column vector
+        if len(x_shape) == 1 and len(y_shape) == 1:
+            # For 1D @ 1D -> scalar: grad is simply the other vector
+            DX = sp.csc_array(values[1].reshape(-1, 1))
+            DY = sp.csc_array(values[0].reshape(-1, 1))
             return [DX, DY]
 
-        # DX = [diag(Y11), diag(Y12), ...]
-        #      [diag(Y21), diag(Y22), ...]
-        #      [   ...        ...     ...]
-        DX = sp.dok_array((DX_rows, cols))
-        for k in range(self.args[0].shape[0]):
-            DX[k::self.args[0].shape[0], k::self.args[0].shape[0]] = Y
-        DX = sp.csc_array(DX)
-        cols = 1 if len(self.args[1].shape) == 1 else self.args[1].shape[1]
-        DY = sp.block_diag([np.atleast_2d(X.T) for k in range(cols)], "csc")
+        # For matrix @ vector, Y is (k,) -> treat as (k, 1)
+        # Note: atleast_2d converts (k,) to (1, k), so we transpose to get (k, 1)
+        if len(y_shape) == 1:
+            Y = Y.T  # (1, k) from atleast_2d -> (k, 1)
+
+        # For vector @ matrix, X is (k,) -> treat as (1, k)
+        if len(x_shape) == 1:
+            X = X  # already (1, k) from atleast_2d
+
+        m = X.shape[0]  # rows of X
+        n = Y.shape[1]  # cols of Y
+
+        # grad_X = kron(Y, I_m) with shape (m*k, m*n)
+        DX = sp.kron(Y, sp.eye_array(m), format='csc')
+
+        # grad_Y = kron(I_n, X).T with shape (k*n, m*n)
+        DY = sp.kron(sp.eye_array(n), X, format='csc').T
 
         return [DX, DY]
 
@@ -461,27 +475,34 @@ def is_nsd(self) -> bool:
                (self.args[0].is_nsd() and self.args[1].is_psd())
 
     def _grad(self, values):
-        """Compute the gradient of elementwise multiplication w.r.t. each argument.
-    
-        For z = x * y (elementwise), returns:
-        - dz/dx = diag(y)
-        - dz/dy = diag(x)
-    
+        """Gives the (sub/super)gradient of elementwise multiply.
+
+        For z = multiply(x, y), we have z[i] = x[i] * y[i].
+        Gradient is diagonal: grad_x = diag(y), grad_y = diag(x).
+        CVXPY convention: grad[i, j] = d(output[j]) / d(input[i])
+
         Args:
-            values: A list of numeric values for the arguments [x, y].
-    
+            values: A list of numeric values for the arguments.
+
         Returns:
-            A list of SciPy CSC sparse matrices [DX, DY].
+            A list of SciPy CSC sparse matrices or None.
         """
-        x = values[0]
-        y = values[1]
-        # Flatten in case inputs are not 1D
-        x = np.asarray(x).flatten(order='F')
-        y = np.asarray(y).flatten(order='F')
-        DX = sp.diags(y, format='csc')
-        DY = sp.diags(x, format='csc')
+        if self.args[0].is_constant() or self.args[1].is_constant():
+            return super(multiply, self)._grad(values)
+
+        X = values[0]
+        Y = values[1]
+
+        # Flatten in F-order for CVXPY convention
+        x_flat = np.asarray(X).flatten(order='F')
+        y_flat = np.asarray(Y).flatten(order='F')
+
+        # Gradient is diagonal: grad_x[i, i] = y[i], grad_y[i, i] = x[i]
+        DX = sp.diags(y_flat, format='csc')
+        DY = sp.diags(x_flat, format='csc')
+
         return [DX, DY]
-    
+
     def _verify_hess_vec_args(self):
         x = self.args[0]
         y = self.args[1]
 
@@ -13,40 +13,30 @@
 See the License for the specific language governing permissions and
 limitations under the License.
 """
-from typing import List, Tuple
+import warnings
+from typing import Optional, Tuple
 
 import numpy as np
 import scipy.sparse as sp
+from numpy.lib.array_utils import normalize_axis_index
 
-import cvxpy.lin_ops.lin_op as lo
-import cvxpy.lin_ops.lin_utils as lu
 from cvxpy.atoms.affine.affine_atom import AffAtom
-from cvxpy.atoms.affine.binary_operators import MulExpression
 from cvxpy.atoms.axis_atom import AxisAtom
-from cvxpy.constraints.constraint import Constraint
 from cvxpy.expressions.expression import Expression
-from cvxpy.expressions.variable import Variable
 
 
-def get_diff_mat(dim: int, axis: int) -> sp.csc_array:
-    """Return a sparse matrix representation of first order difference operator.
+def _sparse_triu_ones(dim: int) -> sp.csc_array:
+    """Create a sparse upper triangular matrix of ones.
 
-    Parameters
-    ----------
-    dim : int
-       The length of the matrix dimensions.
-    axis : int
-       The axis to take the difference along.
-
-    Returns
-    -------
-    sp.csc_array
-        A square matrix representing first order difference.
+    This avoids allocating a dense dim x dim matrix.
+    Used for cumsum gradient in CVXPY's convention: grad[i,j] = d(out[j])/d(in[i]).
     """
-    mat = sp.diags_array([np.ones(dim), -np.ones(dim - 1)], offsets=[0, -1],
-                   shape=(dim, dim),
-                   format='csc')
-    return mat if axis == 0 else mat.T
+    # Row i has entries at columns i, i+1, ..., dim-1
+    # So row 0 has dim entries, row 1 has dim-1, etc.
+    rows = np.repeat(np.arange(dim), np.arange(dim, 0, -1))
+    cols = np.concatenate([np.arange(i, dim) for i in range(dim)])
+    data = np.ones(len(rows))
+    return sp.csc_array((data, (rows, cols)), shape=(dim, dim))
 
 
 class cumsum(AffAtom, AxisAtom):
@@ -57,82 +47,88 @@ class cumsum(AffAtom, AxisAtom):
     ----------
     expr : CVXPY expression
         The expression being summed.
-    axis : int
-        The axis to sum across if 2D.
+    axis : int, optional
+        The axis to sum across. If None, the array is flattened before cumsum.
+        Note: NumPy's default is axis=None, while CVXPY defaults to axis=0.
     """
-    def __init__(self, expr: Expression, axis: int = 0) -> None:
+    def __init__(self, expr: Expression, axis: Optional[int] = 0) -> None:
         super(cumsum, self).__init__(expr, axis)
 
+    def validate_arguments(self) -> None:
+        """Validate axis, but handle 0D arrays specially."""
+        if self.args[0].ndim == 0:
+            if self.axis is not None:
+                warnings.warn(
+                    "cumsum on 0-dimensional arrays currently returns a scalar, "
+                    "but in a future CVXPY version it will return a 1-element "
+                    "array to match numpy.cumsum behavior. Additionally, only "
+                    "axis=0, axis=-1, or axis=None will be valid for 0D arrays.",
+                    FutureWarning
+                )
+        else:
+            super().validate_arguments()
+
     @AffAtom.numpy_numeric
     def numeric(self, values):
         """
         Returns the cumulative sum of elements of an expression over an axis.
         """
         return np.cumsum(values[0], axis=self.axis)
 
-    def validate_arguments(self):
-        if self.args[0].ndim > 2:
-            raise UserWarning(
-                "cumsum is only implemented for 1D or 2D arrays and might not "
-                "work as expected for higher dimensions."
-            )
-    
     def shape_from_args(self) -> Tuple[int, ...]:
-        """The same as the input."""
+        """Flattened if axis=None, otherwise same as input."""
+        if self.axis is None:
+            return (self.args[0].size,)
         return self.args[0].shape
 
     def _grad(self, values):
         """Gives the (sub/super)gradient of the atom w.r.t. each argument.
 
         Matrix expressions are vectorized, so the gradient is a matrix.
+        CVXPY convention: grad[i, j] = d(output[j]) / d(input[i]).
 
         Args:
             values: A list of numeric values for the arguments.
 
         Returns:
             A list of SciPy CSC sparse matrices or None.
         """
-        dim = values[0].shape[self.axis]
-        mat = sp.csc_array(np.tril(np.ones((dim, dim))))
-        var = Variable(self.args[0].shape)
-        if self.axis == 0:
-            grad = MulExpression(mat, var)._grad(values)[1]
-        else:
-            grad = MulExpression(var, mat.T)._grad(values)[0]
-        return [grad]
+        ndim = len(values[0].shape)
+        axis = self.axis
+
+        # Handle axis=None: treat as 1D cumsum over C-order flattened array
+        if axis is None:
+            dim = values[0].size
+            # For cumsum with axis=None:
+            # - Input x is vectorized in F-order (CVXPY convention)
+            # - cumsum flattens in C-order then computes cumsum
+            # - Let x_f = F-order input, x_c = C-order = P @ x_f
+            # - y = L @ x_c = L @ P @ x_f (L is lower triangular)
+            # - dy/dx_f = L @ P
+            # - CVXPY wants grad[i,j] = dy[j]/dx_f[i] = (L @ P).T = P.T @ L.T = P.T @ U
+            # where U is upper triangular
+            triu = _sparse_triu_ones(dim)
+            # Permutation: P @ f_vec = c_vec
+            c_order_indices = np.arange(dim).reshape(values[0].shape, order='F').flatten(order='C')
+            P = sp.csc_array((np.ones(dim), (np.arange(dim), c_order_indices)), shape=(dim, dim))
+            grad = P.T @ triu
+            return [sp.csc_array(grad)]
+
+        axis = normalize_axis_index(axis, ndim)
+        dim = values[0].shape[axis]
+
+        # Upper triangular matrix for CVXPY gradient convention
+        # grad[i, j] = d(cumsum[j])/d(x[i]) = 1 if i <= j
+        triu = _sparse_triu_ones(dim)
+
+        # Kronecker product: I_post ⊗ triu ⊗ I_pre
+        # This works for all dimensions including 1D and 2D
+        pre_size = int(np.prod(values[0].shape[:axis])) if axis > 0 else 1
+        post_size = int(np.prod(values[0].shape[axis+1:])) if axis < ndim - 1 else 1
+
+        grad = sp.kron(sp.kron(sp.eye_array(post_size), triu), sp.eye_array(pre_size))
+        return [sp.csc_array(grad)]
 
     def get_data(self):
         """Returns the axis being summed."""
         return [self.axis]
-
-    def graph_implementation(
-        self, arg_objs, shape: Tuple[int, ...], data=None
-    ) -> Tuple[lo.LinOp, List[Constraint]]:
-        """Cumulative sum via difference matrix.
-
-        Parameters
-        ----------
-        arg_objs : list
-            LinExpr for each argument.
-        shape : tuple
-            The shape of the resulting expression.
-        data :
-            Additional data required by the atom.
-
-        Returns
-        -------
-        tuple
-            (LinOp for objective, list of constraints)
-        """
-        # Implicit O(n) definition:
-        # X = Y[1:,:] - Y[:-1, :]
-        Y = lu.create_var(shape)
-        axis = data[0]
-        dim = shape[axis]
-        diff_mat = get_diff_mat(dim, axis)
-        diff_mat = lu.create_const(diff_mat, (dim, dim), sparse=True)
-        if axis == 0:
-            diff = lu.mul_expr(diff_mat, Y)
-        else:
-            diff = lu.rmul_expr(Y, diff_mat)
-        return (Y, [lu.create_eq(arg_objs[0], diff)])
@@ -14,38 +14,60 @@
 limitations under the License.
 """
 
+from numpy.lib.array_utils import normalize_axis_index
+
 from cvxpy.expressions.expression import Expression
 
 
 def diff(x, k: int = 1, axis: int = 0):
-    """Vector of kth order differences.
+    """Computes kth order differences along the specified axis.
+
+    Takes in an array and returns an array with the kth order differences
+    along the given axis. The output shape is the same as the input except
+    the size along the specified axis is reduced by k.
 
-    Takes in a vector of length n and returns a vector
-    of length n-k of the kth order differences.
+    diff(x) returns the differences between adjacent elements along axis 0:
+        [x[1] - x[0], x[2] - x[1], ...]
 
-    diff(x) returns the vector of differences between
-    adjacent elements in the vector, that is
+    diff(x, 2) is the second-order differences, equivalently diff(diff(x))
 
-    [x[2] - x[1], x[3] - x[2], ...]
+    diff(x, 0) returns the array x unchanged
 
-    diff(x, 2) is the second-order differences vector,
-    equivalently diff(diff(x))
+    Parameters
+    ----------
+    x : Expression or array-like
+        Input array.
+    k : int, optional
+        The number of times values are differenced. Default is 1.
+    axis : int, optional
+        The axis along which the difference is taken. Default is 0.
+        Note: NumPy's np.diff uses axis=-1 as default.
 
-    diff(x, 0) returns the vector x unchanged
+    Returns
+    -------
+    Expression
+        The kth order differences along the specified axis.
     """
     x = Expression.cast_to_const(x)
-    if (axis == 1 and x.ndim < 2) or x.ndim == 0:
+
+    # Validate and normalize axis (handles negative indices)
+    if x.ndim == 0:
         raise ValueError("Invalid axis given input dimensions.")
-    elif axis == 1:
-        x = x.T
+    axis = normalize_axis_index(axis, x.ndim)
 
-    # Always test shape[0] because if axis == 1 x is transposed.
-    if k < 0 or k >= x.shape[0]:
+    # Validate k
+    if k < 0 or k >= x.shape[axis]:
         raise ValueError("Must have k >= 0 and X must have < k elements along "
                          "axis")
-    for i in range(k):
-        if x.ndim == 2:
-            x = x[1:, :] - x[:-1, :]
-        else:
-            x = x[1:] - x[:-1]
-    return x.T if axis == 1 else x
+
+    # Apply k iterations of first-order difference along axis
+    for _ in range(k):
+        slices_upper = [slice(None)] * x.ndim
+        slices_upper[axis] = slice(1, None)
+
+        slices_lower = [slice(None)] * x.ndim
+        slices_lower[axis] = slice(None, -1)
+
+        x = x[tuple(slices_upper)] - x[tuple(slices_lower)]
+
+    return x