cvxgrp
diff --git a/‎.github/workflows/build.yml‎
Lines changed: 1 addition & 1 deletion b/‎.github/workflows/build.yml‎
Lines changed: 1 addition & 1 deletion
diff --git a/‎cvxpy/atoms/affine/cumsum.py‎
Lines changed: 5 additions & 2 deletions b/‎cvxpy/atoms/affine/cumsum.py‎
Lines changed: 5 additions & 2 deletions
diff --git a/‎cvxpy/atoms/affine/diag.py‎
Lines changed: 1 addition & 1 deletion b/‎cvxpy/atoms/affine/diag.py‎
Lines changed: 1 addition & 1 deletion
diff --git a/‎cvxpy/atoms/affine/imag.py‎
Lines changed: 1 addition & 1 deletion b/‎cvxpy/atoms/affine/imag.py‎
Lines changed: 1 addition & 1 deletion
diff --git a/‎cvxpy/atoms/affine/real.py‎
Lines changed: 1 addition & 1 deletion b/‎cvxpy/atoms/affine/real.py‎
Lines changed: 1 addition & 1 deletion
diff --git a/‎cvxpy/atoms/affine/sum.py‎
Lines changed: 7 additions & 3 deletions b/‎cvxpy/atoms/affine/sum.py‎
Lines changed: 7 additions & 3 deletions
diff --git a/‎cvxpy/atoms/affine/trace.py‎
Lines changed: 12 additions & 7 deletions b/‎cvxpy/atoms/affine/trace.py‎
Lines changed: 12 additions & 7 deletions
diff --git a/‎cvxpy/atoms/affine/upper_tri.py‎
Lines changed: 41 additions & 9 deletions b/‎cvxpy/atoms/affine/upper_tri.py‎
Lines changed: 41 additions & 9 deletions
diff --git a/‎cvxpy/atoms/axis_atom.py‎
Lines changed: 105 additions & 30 deletions b/‎cvxpy/atoms/axis_atom.py‎
Lines changed: 105 additions & 30 deletions
diff --git a/‎cvxpy/atoms/cummax.py‎
Lines changed: 2 additions & 0 deletions b/‎cvxpy/atoms/cummax.py‎
Lines changed: 2 additions & 0 deletions
@@ -159,7 +159,7 @@ jobs:
           CIBW_SKIP: "*-win32 *-manylinux_i686 *-musllinux*"
           CIBW_ARCHS_MACOS: x86_64 universal2
           CIBW_ARCHS_LINUX: auto aarch64
-        uses: pypa/cibuildwheel@v3.3.0
+        uses: pypa/cibuildwheel@v3.4.0
 
       - name: Build source
         if: ${{github.event_name == 'push' && env.SINGLE_ACTION_CONFIG == 'True'}}
 
@@ -13,7 +13,7 @@
 See the License for the specific language governing permissions and
 limitations under the License.
 """
-from typing import Optional, Tuple
+from typing import Tuple
 
 import numpy as np
 import scipy.sparse as sp
@@ -51,7 +51,10 @@ class cumsum(AffAtom, AxisAtom):
         The axis to sum across. If None, the array is flattened before cumsum.
         Note: NumPy's default is axis=None, while CVXPY defaults to axis=0.
     """
-    def __init__(self, expr: Expression, axis: Optional[int] = 0) -> None:
+
+    _reduce_all_axes_to_none = False
+
+    def __init__(self, expr: Expression, axis: None | int = 0) -> None:
         super(cumsum, self).__init__(expr, axis)
 
     def validate_arguments(self) -> None:
 
@@ -101,7 +101,7 @@ def is_symmetric(self) -> bool:
     def is_hermitian(self) -> bool:
         """Is the expression hermitian?
         """
-        return self.k == 0
+        return self.k == 0 and self.args[0].is_real()
 
     def is_psd(self) -> bool:
         """Is the expression a positive semidefinite matrix?
 
@@ -49,4 +49,4 @@ def is_complex(self) -> bool:
     def is_symmetric(self) -> bool:
         """Is the expression symmetric?
         """
-        return self.args[0].is_hermitian()
+        return self.args[0].is_symmetric()
@@ -50,4 +50,4 @@ def is_complex(self) -> bool:
     def is_symmetric(self) -> bool:
         """Is the expression symmetric?
         """
-        return self.args[0].is_hermitian()
+        return self.args[0].is_symmetric() or self.args[0].is_hermitian()
@@ -16,7 +16,7 @@
 import builtins
 from functools import wraps
 from types import GeneratorType
-from typing import Optional, Tuple
+from typing import Tuple
 
 import numpy as np
 from numpy.exceptions import AxisError
@@ -25,7 +25,7 @@
 import cvxpy.lin_ops.lin_op as lo
 import cvxpy.lin_ops.lin_utils as lu
 from cvxpy.atoms.affine.affine_atom import AffAtom
-from cvxpy.atoms.axis_atom import AxisAtom
+from cvxpy.atoms.axis_atom import AxisAtom, normalize_axis
 from cvxpy.constraints.constraint import Constraint
 from cvxpy.utilities import bounds as bounds_utils
 
@@ -123,6 +123,10 @@ def graph_implementation(self,
             The axis and keepdims parameters of the sum expression.
         """
         axis, keepdims = data
+        # Normalize tuple axes so they use the fast path when possible.
+        if isinstance(axis, tuple):
+            ndim = len(arg_objs[0].shape)
+            axis = normalize_axis(axis, ndim)
         # Note: added new case for summing with n-dimensional shapes and
         # multiple axes. Previous behavior is kept in the else statement.
         if len(arg_objs[0].shape) > 2 or axis not in {None, 0, 1}:
@@ -148,7 +152,7 @@ def graph_implementation(self,
 
 
 @wraps(Sum)
-def sum(expr, axis: Optional[int] = None, keepdims: bool = False):
+def sum(expr, axis: None | int | tuple[int, ...] = None, keepdims: bool = False):
     """
     Wrapper for Sum class.
     """
 
@@ -57,6 +57,9 @@ def trace(expr):
 class Trace(AffAtom):
     """The sum of the diagonal entries of a matrix.
 
+    Follows ``np.linalg.trace`` conventions: for an input with shape
+    ``(*batch, n, n)``, returns an expression with shape ``(*batch,)``.
+
     Parameters
     ----------
     expr : Expression
@@ -79,19 +82,21 @@ def sign_from_args(self) -> Tuple[bool, bool]:
     def numeric(self, values):
         """Sums the diagonal entries.
         """
-        return np.trace(values[0])
+        return np.linalg.trace(values[0])
 
     def validate_arguments(self) -> None:
-        """Checks that the argument is a square matrix.
+        """Checks that the argument is a square matrix (possibly batched).
         """
         shape = self.args[0].shape
-        if self.args[0].ndim != 2 or shape[0] != shape[1]:
-            raise ValueError("Argument to trace must be a 2-d square array.")
+        if self.args[0].ndim < 2 or shape[-2] != shape[-1]:
+            raise ValueError(
+                "Argument to trace must have ndim >= 2 with equal last two dimensions."
+            )
 
     def shape_from_args(self) -> Tuple[int, ...]:
-        """Always scalar.
+        """Scalar for 2D input, batch shape for ND input.
         """
-        return tuple()
+        return self.args[0].shape[:-2]
 
     def is_real(self) -> bool:
         return self.args[0].is_real() or self.args[0].is_hermitian()
@@ -128,4 +133,4 @@ def graph_implementation(
         tuple
             (LinOp for objective, list of constraints)
         """
-        return (lu.trace(arg_objs[0]), [])
+        return (lu.trace(arg_objs[0], shape), [])
@@ -57,22 +57,27 @@ def numeric(self, values):
         """
         Vectorize the strictly upper triangular entries.
         """
-        upper_idx = np.triu_indices(n=values[0].shape[0], k=1, m=values[0].shape[1])
-        return values[0][upper_idx]
+        n = values[0].shape[-1]
+        rows, cols = np.triu_indices(n, k=1)
+        return values[0][..., rows, cols]
 
     def validate_arguments(self) -> None:
-        """Checks that the argument is a square matrix.
+        """Checks that the argument is a square matrix with ndim >= 2.
         """
-        if not self.args[0].ndim == 2 or self.args[0].shape[0] != self.args[0].shape[1]:
+        shape = self.args[0].shape
+        if len(shape) < 2 or shape[-2] != shape[-1]:
             raise ValueError(
-                "Argument to upper_tri must be a 2-d square array."
+                "Argument to upper_tri must have ndim >= 2 with equal last two dimensions."
             )
 
-    def shape_from_args(self) -> Tuple[int, int]:
-        """A vector.
+    def shape_from_args(self) -> Tuple[int, ...]:
+        """Batch shape + vector of upper triangular entries.
         """
-        rows, cols = self.args[0].shape
-        return (rows*(cols-1)//2, 1)
+        shape = self.args[0].shape
+        n = shape[-1]
+        batch_shape = shape[:-2]
+        tri = n * (n - 1) // 2
+        return batch_shape + (tri, 1)
 
     def is_atom_log_log_convex(self) -> bool:
         """Is the atom log-log convex?
@@ -174,3 +179,30 @@ def upper_tri_to_full(n: int) -> sp.csc_array:
 
     # Construct and return the sparse matrix
     return sp.csc_array((values, (row_idx, col_idx)), shape=(n * n, entries))
+
+
+def batched_upper_tri_to_full(batch_size: int, n: int) -> sp.csc_array:
+    """
+    Returns a coefficient matrix that maps a vector of batch_size * tri entries
+    (F-order layout of (batch_size, tri)) to batch_size * n*n entries
+    (F-order layout of (batch_size, n, n)).
+
+    Uses Kronecker product kron(upper_tri_to_full(n), eye(batch_size)) because
+    F-order reshape interleaves batch elements.
+
+    Parameters
+    ----------
+    batch_size : int
+        The number of batch elements.
+    n : int
+        The dimension of the square matrix.
+
+    Returns
+    -------
+    sp.csc_array
+        The coefficient matrix.
+    """
+    single = upper_tri_to_full(n)
+    if batch_size == 1:
+        return single
+    return sp.csc_array(sp.kron(single, sp.eye(batch_size), format='csc'))
@@ -14,7 +14,7 @@
 limitations under the License.
 """
 
-from typing import List, Optional, Tuple
+from typing import Tuple
 
 import numpy as np
 import scipy.sparse as sp
@@ -23,15 +23,47 @@
 from cvxpy.atoms.atom import Atom
 
 
+def normalize_axis(
+    axis: int | tuple[int, ...], ndim: int, reduce_all_to_none: bool = True
+) -> None | int | tuple[int, ...]:
+    """Normalize an axis argument to a canonical form.
+
+    - Negative indices become positive.
+    - Single-element tuples become an int.
+    - If all axes are listed and *reduce_all_to_none* is True, returns None.
+    """
+    axes = normalize_axis_tuple(axis, ndim)
+    if reduce_all_to_none and len(axes) == ndim:
+        return None
+    elif len(axes) == 1:
+        return axes[0]
+    else:
+        return axes
+
+
 class AxisAtom(Atom):
     """
     An abstract base class for atoms that can be applied along an axis.
     """
 
-    def __init__(self, expr, axis: Optional[int] = None, keepdims: bool = False) -> None:
+    # Whether reducing over all axes is equivalent to axis=None.
+    # True for reduction atoms (sum, max, min, etc.).
+    # False for cumulative atoms (cumsum, cummax, cumprod) that preserve shape.
+    _reduce_all_axes_to_none = True
+
+    def __init__(
+        self, expr, axis: None | int | tuple[int, ...] = None, keepdims: bool = False
+    ) -> None:
         self.axis = axis
         self.keepdims = keepdims
         super(AxisAtom, self).__init__(expr)
+        # Normalize axis after init so self.args is available.
+        if self.axis is not None:
+            ndim = len(self.args[0].shape)
+            if ndim > 0:
+                self.axis = normalize_axis(
+                    self.axis, ndim, self._reduce_all_axes_to_none
+                )
 
     def shape_from_args(self) -> Tuple[int, ...]:
         """
@@ -75,12 +107,14 @@ def validate_arguments(self) -> None:
             _ = normalize_axis_tuple(axes, dim)
         super(AxisAtom, self).validate_arguments()
 
-    def _axis_grad(self, values) -> Optional[List[sp.csc_array]]:
+    def _axis_grad(self, values) -> list[sp.csc_array] | None:
         """
         Gives the (sub/super)gradient of the atom w.r.t. each argument.
 
         Matrix expressions are vectorized, so the gradient is a matrix.
-        Takes axis into account.
+        Takes axis into account. Works for any number of dimensions.
+
+        CVXPY convention: grad[i, j] = d(output_flat_F[j]) / d(input_flat_F[i])
 
         Args:
             values: A list of numeric values for the arguments.
@@ -93,33 +127,74 @@ def _axis_grad(self, values) -> Optional[List[sp.csc_array]]:
             D = self._column_grad(value)
             if D is not None:
                 D = sp.csc_array(D)
+            return [D]
+
+        input_shape = self.args[0].shape
+        ndim = len(input_shape)
+
+        # Normalize axis to tuple
+        axis = self.axis
+        axes = (axis,) if isinstance(axis, int) else tuple(axis)
+        keep = [i for i in range(ndim) if i not in axes]
+
+        reduce_dims = [input_shape[a] for a in axes]
+        reduce_size = int(np.prod(reduce_dims))
+        output_shape = tuple(input_shape[i] for i in keep)
+        input_size = int(np.prod(input_shape))
+        output_size = max(1, int(np.prod(output_shape)))
+
+        # F-order strides: stride[k] = prod(input_shape[:k])
+        f_strides = np.ones(ndim, dtype=int)
+        for k in range(1, ndim):
+            f_strides[k] = f_strides[k-1] * input_shape[k-1]
+
+        # Flat input in F-order
+        flat_input = values[0].ravel(order='F')
+
+        # All output multi-indices in F-order
+        if len(output_shape) == 0:
+            out_multis = np.zeros((0, 1), dtype=int)
         else:
-            m, n = self.args[0].shape
-            if self.axis == 0:  # function apply to each column
-                D = sp.csc_array((m*n, n), dtype=float)
-                for i in range(n):
-                    value = values[0][:, i]
-                    d = self._column_grad(value).T
-                    if d is None:
-                        return [None]
-                    else:
-                        d = np.array(d).flatten()
-                    row = np.linspace(i*m, i*m+m-1, m)  # [i*m, i*m+1, ..., i*m+m-1]
-                    col = np.ones((m))*i
-                    D = D + sp.csc_array((d, (row, col)),
-                                          shape=(m*n, n))  # d must be 1-D
-            else:  # function apply to each row
-                values = np.transpose(values[0])
-                D = sp.csc_array((m*n, m), dtype=float)
-                for i in range(m):
-                    value = values[:, i]
-                    d = self._column_grad(value).T
-                    if d is None:
-                        return [None]
-                    row = np.linspace(i, i+(n-1)*m, n)  # [0+i, m+i, ..., m(n-1)+i]
-                    col = np.ones((n))*i
-                    D = D + sp.csc_array((np.array(d)[0], (row, col)),
-                                          shape=(m*n, m))  # d must be 1-D
+            out_multis = np.array(
+                np.unravel_index(np.arange(output_size), output_shape, order='F')
+            )  # shape: (len(keep), output_size)
+
+        # All reduce-axis multi-indices
+        reduce_multis = np.array(
+            np.unravel_index(np.arange(reduce_size), reduce_dims)
+        ).T  # shape: (reduce_size, len(axes))
+
+        all_rows = []
+        all_cols = []
+        all_data = []
+
+        for j in range(output_size):
+            om = out_multis[:, j]
+
+            # Build input multi-indices: fix keep axes, vary reduce axes
+            in_multis = np.zeros((reduce_size, ndim), dtype=int)
+            for idx, k in enumerate(keep):
+                in_multis[:, k] = om[idx]
+            for idx, a in enumerate(axes):
+                in_multis[:, a] = reduce_multis[:, idx]
+
+            # Compute flat F-order indices for this fiber
+            fiber_indices = in_multis @ f_strides
+            fiber_values = flat_input[fiber_indices]
+
+            d = self._column_grad(fiber_values.reshape(-1, 1))
+            if d is None:
+                return [None]
+            d = np.asarray(d).flatten()
+
+            all_rows.append(fiber_indices)
+            all_cols.append(np.full(reduce_size, j, dtype=int))
+            all_data.append(d)
+
+        rows = np.concatenate(all_rows)
+        cols = np.concatenate(all_cols)
+        data = np.concatenate(all_data)
+        D = sp.csc_array((data, (rows, cols)), shape=(input_size, output_size))
         return [D]
 
     def _column_grad(self, value):
 
@@ -25,6 +25,8 @@ class cummax(AxisAtom):
     """Cumulative maximum.
     """
 
+    _reduce_all_axes_to_none = False
+
     def __init__(self, x, axis: int = 0) -> None:
         super(cummax, self).__init__(x, axis=axis)