1313See the License for the specific language governing permissions and
1414limitations under the License.
1515"""
16- from typing import List , Tuple
16+ import warnings
17+ from typing import Optional , Tuple
1718
1819import numpy as np
1920import scipy .sparse as sp
21+ from numpy .lib .array_utils import normalize_axis_index
2022
21- import cvxpy .lin_ops .lin_op as lo
22- import cvxpy .lin_ops .lin_utils as lu
2323from cvxpy .atoms .affine .affine_atom import AffAtom
24- from cvxpy .atoms .affine .binary_operators import MulExpression
2524from cvxpy .atoms .axis_atom import AxisAtom
26- from cvxpy .constraints .constraint import Constraint
2725from cvxpy .expressions .expression import Expression
28- from cvxpy .expressions .variable import Variable
2926
3027
31- def get_diff_mat (dim : int , axis : int ) -> sp .csc_array :
32- """Return a sparse matrix representation of first order difference operator .
28+ def _sparse_triu_ones (dim : int ) -> sp .csc_array :
29+ """Create a sparse upper triangular matrix of ones .
3330
34- Parameters
35- ----------
36- dim : int
37- The length of the matrix dimensions.
38- axis : int
39- The axis to take the difference along.
40-
41- Returns
42- -------
43- sp.csc_array
44- A square matrix representing first order difference.
31+ This avoids allocating a dense dim x dim matrix.
32+ Used for cumsum gradient in CVXPY's convention: grad[i,j] = d(out[j])/d(in[i]).
4533 """
46- mat = sp .diags_array ([np .ones (dim ), - np .ones (dim - 1 )], offsets = [0 , - 1 ],
47- shape = (dim , dim ),
48- format = 'csc' )
49- return mat if axis == 0 else mat .T
34+ # Row i has entries at columns i, i+1, ..., dim-1
35+ # So row 0 has dim entries, row 1 has dim-1, etc.
36+ rows = np .repeat (np .arange (dim ), np .arange (dim , 0 , - 1 ))
37+ cols = np .concatenate ([np .arange (i , dim ) for i in range (dim )])
38+ data = np .ones (len (rows ))
39+ return sp .csc_array ((data , (rows , cols )), shape = (dim , dim ))
5040
5141
5242class cumsum (AffAtom , AxisAtom ):
@@ -57,82 +47,88 @@ class cumsum(AffAtom, AxisAtom):
5747 ----------
5848 expr : CVXPY expression
5949 The expression being summed.
60- axis : int
61- The axis to sum across if 2D.
50+ axis : int, optional
51+ The axis to sum across. If None, the array is flattened before cumsum.
52+ Note: NumPy's default is axis=None, while CVXPY defaults to axis=0.
6253 """
63- def __init__ (self , expr : Expression , axis : int = 0 ) -> None :
54+ def __init__ (self , expr : Expression , axis : Optional [ int ] = 0 ) -> None :
6455 super (cumsum , self ).__init__ (expr , axis )
6556
57+ def validate_arguments (self ) -> None :
58+ """Validate axis, but handle 0D arrays specially."""
59+ if self .args [0 ].ndim == 0 :
60+ if self .axis is not None :
61+ warnings .warn (
62+ "cumsum on 0-dimensional arrays currently returns a scalar, "
63+ "but in a future CVXPY version it will return a 1-element "
64+ "array to match numpy.cumsum behavior. Additionally, only "
65+ "axis=0, axis=-1, or axis=None will be valid for 0D arrays." ,
66+ FutureWarning
67+ )
68+ else :
69+ super ().validate_arguments ()
70+
6671 @AffAtom .numpy_numeric
6772 def numeric (self , values ):
6873 """
6974 Returns the cumulative sum of elements of an expression over an axis.
7075 """
7176 return np .cumsum (values [0 ], axis = self .axis )
7277
73- def validate_arguments (self ):
74- if self .args [0 ].ndim > 2 :
75- raise UserWarning (
76- "cumsum is only implemented for 1D or 2D arrays and might not "
77- "work as expected for higher dimensions."
78- )
79-
8078 def shape_from_args (self ) -> Tuple [int , ...]:
81- """The same as the input."""
79+ """Flattened if axis=None, otherwise same as input."""
80+ if self .axis is None :
81+ return (self .args [0 ].size ,)
8282 return self .args [0 ].shape
8383
8484 def _grad (self , values ):
8585 """Gives the (sub/super)gradient of the atom w.r.t. each argument.
8686
8787 Matrix expressions are vectorized, so the gradient is a matrix.
88+ CVXPY convention: grad[i, j] = d(output[j]) / d(input[i]).
8889
8990 Args:
9091 values: A list of numeric values for the arguments.
9192
9293 Returns:
9394 A list of SciPy CSC sparse matrices or None.
9495 """
95- dim = values [0 ].shape [self .axis ]
96- mat = sp .csc_array (np .tril (np .ones ((dim , dim ))))
97- var = Variable (self .args [0 ].shape )
98- if self .axis == 0 :
99- grad = MulExpression (mat , var )._grad (values )[1 ]
100- else :
101- grad = MulExpression (var , mat .T )._grad (values )[0 ]
102- return [grad ]
96+ ndim = len (values [0 ].shape )
97+ axis = self .axis
98+
99+ # Handle axis=None: treat as 1D cumsum over C-order flattened array
100+ if axis is None :
101+ dim = values [0 ].size
102+ # For cumsum with axis=None:
103+ # - Input x is vectorized in F-order (CVXPY convention)
104+ # - cumsum flattens in C-order then computes cumsum
105+ # - Let x_f = F-order input, x_c = C-order = P @ x_f
106+ # - y = L @ x_c = L @ P @ x_f (L is lower triangular)
107+ # - dy/dx_f = L @ P
108+ # - CVXPY wants grad[i,j] = dy[j]/dx_f[i] = (L @ P).T = P.T @ L.T = P.T @ U
109+ # where U is upper triangular
110+ triu = _sparse_triu_ones (dim )
111+ # Permutation: P @ f_vec = c_vec
112+ c_order_indices = np .arange (dim ).reshape (values [0 ].shape , order = 'F' ).flatten (order = 'C' )
113+ P = sp .csc_array ((np .ones (dim ), (np .arange (dim ), c_order_indices )), shape = (dim , dim ))
114+ grad = P .T @ triu
115+ return [sp .csc_array (grad )]
116+
117+ axis = normalize_axis_index (axis , ndim )
118+ dim = values [0 ].shape [axis ]
119+
120+ # Upper triangular matrix for CVXPY gradient convention
121+ # grad[i, j] = d(cumsum[j])/d(x[i]) = 1 if i <= j
122+ triu = _sparse_triu_ones (dim )
123+
124+ # Kronecker product: I_post ⊗ triu ⊗ I_pre
125+ # This works for all dimensions including 1D and 2D
126+ pre_size = int (np .prod (values [0 ].shape [:axis ])) if axis > 0 else 1
127+ post_size = int (np .prod (values [0 ].shape [axis + 1 :])) if axis < ndim - 1 else 1
128+
129+ grad = sp .kron (sp .kron (sp .eye_array (post_size ), triu ), sp .eye_array (pre_size ))
130+ return [sp .csc_array (grad )]
103131
104132 def get_data (self ):
105133 """Returns the axis being summed."""
106134 return [self .axis ]
107-
108- def graph_implementation (
109- self , arg_objs , shape : Tuple [int , ...], data = None
110- ) -> Tuple [lo .LinOp , List [Constraint ]]:
111- """Cumulative sum via difference matrix.
112-
113- Parameters
114- ----------
115- arg_objs : list
116- LinExpr for each argument.
117- shape : tuple
118- The shape of the resulting expression.
119- data :
120- Additional data required by the atom.
121-
122- Returns
123- -------
124- tuple
125- (LinOp for objective, list of constraints)
126- """
127- # Implicit O(n) definition:
128- # X = Y[1:,:] - Y[:-1, :]
129- Y = lu .create_var (shape )
130- axis = data [0 ]
131- dim = shape [axis ]
132- diff_mat = get_diff_mat (dim , axis )
133- diff_mat = lu .create_const (diff_mat , (dim , dim ), sparse = True )
134- if axis == 0 :
135- diff = lu .mul_expr (diff_mat , Y )
136- else :
137- diff = lu .rmul_expr (Y , diff_mat )
138- return (Y , [lu .create_eq (arg_objs [0 ], diff )])
0 commit comments