Skip to content

Commit 5382605

Browse files
committed
merge from master
2 parents b16501b + 2fe6db1 commit 5382605

27 files changed

+1784
-1200
lines changed

cvxpy/atoms/affine/binary_operators.py

Lines changed: 56 additions & 35 deletions
Original file line numberDiff line numberDiff line change
@@ -201,6 +201,12 @@ def _grad(self, values):
201201
"""Gives the (sub/super)gradient of the atom w.r.t. each argument.
202202
203203
Matrix expressions are vectorized, so the gradient is a matrix.
204+
CVXPY convention: grad[i, j] = d(output[j]) / d(input[i])
205+
Uses Fortran (column-major) ordering for vectorization.
206+
207+
For matrix multiplication C = X @ Y:
208+
- grad_X = kron(Y, I_m) where m = X.shape[0]
209+
- grad_Y = kron(I_n, X).T where n = Y.shape[1] (or 1 for vectors)
204210
205211
Args:
206212
values: A list of numeric values for the arguments.
@@ -211,29 +217,37 @@ def _grad(self, values):
211217
if self.args[0].is_constant() or self.args[1].is_constant():
212218
return super(MulExpression, self)._grad(values)
213219

214-
# TODO(akshayka): Verify that the following code is correct for
215-
# non-affine arguments.
216-
X = values[0]
217-
Y = values[1]
220+
X = np.atleast_2d(values[0])
221+
Y = np.atleast_2d(values[1])
218222

219-
DX_rows = self.args[0].size
220-
cols = self.args[0].size
223+
# Handle 1D shapes: promote to 2D for consistent Kronecker computation
224+
x_shape = self.args[0].shape
225+
y_shape = self.args[1].shape
221226

222227
# dot product of two vectors with shape (n,)
223-
if len(self.args[0].shape) == 1 and len(self.args[1].shape) == 1:
224-
DX = sp.csc_array(Y.reshape(-1, 1)) # y as column vector
225-
DY = sp.csc_array(X.reshape(-1, 1)) # x as column vector
228+
if len(x_shape) == 1 and len(y_shape) == 1:
229+
# For 1D @ 1D -> scalar: grad is simply the other vector
230+
DX = sp.csc_array(values[1].reshape(-1, 1))
231+
DY = sp.csc_array(values[0].reshape(-1, 1))
226232
return [DX, DY]
227233

228-
# DX = [diag(Y11), diag(Y12), ...]
229-
# [diag(Y21), diag(Y22), ...]
230-
# [ ... ... ...]
231-
DX = sp.dok_array((DX_rows, cols))
232-
for k in range(self.args[0].shape[0]):
233-
DX[k::self.args[0].shape[0], k::self.args[0].shape[0]] = Y
234-
DX = sp.csc_array(DX)
235-
cols = 1 if len(self.args[1].shape) == 1 else self.args[1].shape[1]
236-
DY = sp.block_diag([np.atleast_2d(X.T) for k in range(cols)], "csc")
234+
# For matrix @ vector, Y is (k,) -> treat as (k, 1)
235+
# Note: atleast_2d converts (k,) to (1, k), so we transpose to get (k, 1)
236+
if len(y_shape) == 1:
237+
Y = Y.T # (1, k) from atleast_2d -> (k, 1)
238+
239+
# For vector @ matrix, X is (k,) -> treat as (1, k)
240+
if len(x_shape) == 1:
241+
X = X # already (1, k) from atleast_2d
242+
243+
m = X.shape[0] # rows of X
244+
n = Y.shape[1] # cols of Y
245+
246+
# grad_X = kron(Y, I_m) with shape (m*k, m*n)
247+
DX = sp.kron(Y, sp.eye_array(m), format='csc')
248+
249+
# grad_Y = kron(I_n, X).T with shape (k*n, m*n)
250+
DY = sp.kron(sp.eye_array(n), X, format='csc').T
237251

238252
return [DX, DY]
239253

@@ -461,27 +475,34 @@ def is_nsd(self) -> bool:
461475
(self.args[0].is_nsd() and self.args[1].is_psd())
462476

463477
def _grad(self, values):
464-
"""Compute the gradient of elementwise multiplication w.r.t. each argument.
465-
466-
For z = x * y (elementwise), returns:
467-
- dz/dx = diag(y)
468-
- dz/dy = diag(x)
469-
478+
"""Gives the (sub/super)gradient of elementwise multiply.
479+
480+
For z = multiply(x, y), we have z[i] = x[i] * y[i].
481+
Gradient is diagonal: grad_x = diag(y), grad_y = diag(x).
482+
CVXPY convention: grad[i, j] = d(output[j]) / d(input[i])
483+
470484
Args:
471-
values: A list of numeric values for the arguments [x, y].
472-
485+
values: A list of numeric values for the arguments.
486+
473487
Returns:
474-
A list of SciPy CSC sparse matrices [DX, DY].
488+
A list of SciPy CSC sparse matrices or None.
475489
"""
476-
x = values[0]
477-
y = values[1]
478-
# Flatten in case inputs are not 1D
479-
x = np.asarray(x).flatten(order='F')
480-
y = np.asarray(y).flatten(order='F')
481-
DX = sp.diags(y, format='csc')
482-
DY = sp.diags(x, format='csc')
490+
if self.args[0].is_constant() or self.args[1].is_constant():
491+
return super(multiply, self)._grad(values)
492+
493+
X = values[0]
494+
Y = values[1]
495+
496+
# Flatten in F-order for CVXPY convention
497+
x_flat = np.asarray(X).flatten(order='F')
498+
y_flat = np.asarray(Y).flatten(order='F')
499+
500+
# Gradient is diagonal: grad_x[i, i] = y[i], grad_y[i, i] = x[i]
501+
DX = sp.diags(y_flat, format='csc')
502+
DY = sp.diags(x_flat, format='csc')
503+
483504
return [DX, DY]
484-
505+
485506
def _verify_hess_vec_args(self):
486507
x = self.args[0]
487508
y = self.args[1]

cvxpy/atoms/affine/cumsum.py

Lines changed: 70 additions & 74 deletions
Original file line numberDiff line numberDiff line change
@@ -13,40 +13,30 @@
1313
See the License for the specific language governing permissions and
1414
limitations under the License.
1515
"""
16-
from typing import List, Tuple
16+
import warnings
17+
from typing import Optional, Tuple
1718

1819
import numpy as np
1920
import scipy.sparse as sp
21+
from numpy.lib.array_utils import normalize_axis_index
2022

21-
import cvxpy.lin_ops.lin_op as lo
22-
import cvxpy.lin_ops.lin_utils as lu
2323
from cvxpy.atoms.affine.affine_atom import AffAtom
24-
from cvxpy.atoms.affine.binary_operators import MulExpression
2524
from cvxpy.atoms.axis_atom import AxisAtom
26-
from cvxpy.constraints.constraint import Constraint
2725
from cvxpy.expressions.expression import Expression
28-
from cvxpy.expressions.variable import Variable
2926

3027

31-
def get_diff_mat(dim: int, axis: int) -> sp.csc_array:
32-
"""Return a sparse matrix representation of first order difference operator.
28+
def _sparse_triu_ones(dim: int) -> sp.csc_array:
29+
"""Create a sparse upper triangular matrix of ones.
3330
34-
Parameters
35-
----------
36-
dim : int
37-
The length of the matrix dimensions.
38-
axis : int
39-
The axis to take the difference along.
40-
41-
Returns
42-
-------
43-
sp.csc_array
44-
A square matrix representing first order difference.
31+
This avoids allocating a dense dim x dim matrix.
32+
Used for cumsum gradient in CVXPY's convention: grad[i,j] = d(out[j])/d(in[i]).
4533
"""
46-
mat = sp.diags_array([np.ones(dim), -np.ones(dim - 1)], offsets=[0, -1],
47-
shape=(dim, dim),
48-
format='csc')
49-
return mat if axis == 0 else mat.T
34+
# Row i has entries at columns i, i+1, ..., dim-1
35+
# So row 0 has dim entries, row 1 has dim-1, etc.
36+
rows = np.repeat(np.arange(dim), np.arange(dim, 0, -1))
37+
cols = np.concatenate([np.arange(i, dim) for i in range(dim)])
38+
data = np.ones(len(rows))
39+
return sp.csc_array((data, (rows, cols)), shape=(dim, dim))
5040

5141

5242
class cumsum(AffAtom, AxisAtom):
@@ -57,82 +47,88 @@ class cumsum(AffAtom, AxisAtom):
5747
----------
5848
expr : CVXPY expression
5949
The expression being summed.
60-
axis : int
61-
The axis to sum across if 2D.
50+
axis : int, optional
51+
The axis to sum across. If None, the array is flattened before cumsum.
52+
Note: NumPy's default is axis=None, while CVXPY defaults to axis=0.
6253
"""
63-
def __init__(self, expr: Expression, axis: int = 0) -> None:
54+
def __init__(self, expr: Expression, axis: Optional[int] = 0) -> None:
6455
super(cumsum, self).__init__(expr, axis)
6556

57+
def validate_arguments(self) -> None:
58+
"""Validate axis, but handle 0D arrays specially."""
59+
if self.args[0].ndim == 0:
60+
if self.axis is not None:
61+
warnings.warn(
62+
"cumsum on 0-dimensional arrays currently returns a scalar, "
63+
"but in a future CVXPY version it will return a 1-element "
64+
"array to match numpy.cumsum behavior. Additionally, only "
65+
"axis=0, axis=-1, or axis=None will be valid for 0D arrays.",
66+
FutureWarning
67+
)
68+
else:
69+
super().validate_arguments()
70+
6671
@AffAtom.numpy_numeric
6772
def numeric(self, values):
6873
"""
6974
Returns the cumulative sum of elements of an expression over an axis.
7075
"""
7176
return np.cumsum(values[0], axis=self.axis)
7277

73-
def validate_arguments(self):
74-
if self.args[0].ndim > 2:
75-
raise UserWarning(
76-
"cumsum is only implemented for 1D or 2D arrays and might not "
77-
"work as expected for higher dimensions."
78-
)
79-
8078
def shape_from_args(self) -> Tuple[int, ...]:
81-
"""The same as the input."""
79+
"""Flattened if axis=None, otherwise same as input."""
80+
if self.axis is None:
81+
return (self.args[0].size,)
8282
return self.args[0].shape
8383

8484
def _grad(self, values):
8585
"""Gives the (sub/super)gradient of the atom w.r.t. each argument.
8686
8787
Matrix expressions are vectorized, so the gradient is a matrix.
88+
CVXPY convention: grad[i, j] = d(output[j]) / d(input[i]).
8889
8990
Args:
9091
values: A list of numeric values for the arguments.
9192
9293
Returns:
9394
A list of SciPy CSC sparse matrices or None.
9495
"""
95-
dim = values[0].shape[self.axis]
96-
mat = sp.csc_array(np.tril(np.ones((dim, dim))))
97-
var = Variable(self.args[0].shape)
98-
if self.axis == 0:
99-
grad = MulExpression(mat, var)._grad(values)[1]
100-
else:
101-
grad = MulExpression(var, mat.T)._grad(values)[0]
102-
return [grad]
96+
ndim = len(values[0].shape)
97+
axis = self.axis
98+
99+
# Handle axis=None: treat as 1D cumsum over C-order flattened array
100+
if axis is None:
101+
dim = values[0].size
102+
# For cumsum with axis=None:
103+
# - Input x is vectorized in F-order (CVXPY convention)
104+
# - cumsum flattens in C-order then computes cumsum
105+
# - Let x_f = F-order input, x_c = C-order = P @ x_f
106+
# - y = L @ x_c = L @ P @ x_f (L is lower triangular)
107+
# - dy/dx_f = L @ P
108+
# - CVXPY wants grad[i,j] = dy[j]/dx_f[i] = (L @ P).T = P.T @ L.T = P.T @ U
109+
# where U is upper triangular
110+
triu = _sparse_triu_ones(dim)
111+
# Permutation: P @ f_vec = c_vec
112+
c_order_indices = np.arange(dim).reshape(values[0].shape, order='F').flatten(order='C')
113+
P = sp.csc_array((np.ones(dim), (np.arange(dim), c_order_indices)), shape=(dim, dim))
114+
grad = P.T @ triu
115+
return [sp.csc_array(grad)]
116+
117+
axis = normalize_axis_index(axis, ndim)
118+
dim = values[0].shape[axis]
119+
120+
# Upper triangular matrix for CVXPY gradient convention
121+
# grad[i, j] = d(cumsum[j])/d(x[i]) = 1 if i <= j
122+
triu = _sparse_triu_ones(dim)
123+
124+
# Kronecker product: I_post ⊗ triu ⊗ I_pre
125+
# This works for all dimensions including 1D and 2D
126+
pre_size = int(np.prod(values[0].shape[:axis])) if axis > 0 else 1
127+
post_size = int(np.prod(values[0].shape[axis+1:])) if axis < ndim - 1 else 1
128+
129+
grad = sp.kron(sp.kron(sp.eye_array(post_size), triu), sp.eye_array(pre_size))
130+
return [sp.csc_array(grad)]
103131

104132
def get_data(self):
105133
"""Returns the axis being summed."""
106134
return [self.axis]
107-
108-
def graph_implementation(
109-
self, arg_objs, shape: Tuple[int, ...], data=None
110-
) -> Tuple[lo.LinOp, List[Constraint]]:
111-
"""Cumulative sum via difference matrix.
112-
113-
Parameters
114-
----------
115-
arg_objs : list
116-
LinExpr for each argument.
117-
shape : tuple
118-
The shape of the resulting expression.
119-
data :
120-
Additional data required by the atom.
121-
122-
Returns
123-
-------
124-
tuple
125-
(LinOp for objective, list of constraints)
126-
"""
127-
# Implicit O(n) definition:
128-
# X = Y[1:,:] - Y[:-1, :]
129-
Y = lu.create_var(shape)
130-
axis = data[0]
131-
dim = shape[axis]
132-
diff_mat = get_diff_mat(dim, axis)
133-
diff_mat = lu.create_const(diff_mat, (dim, dim), sparse=True)
134-
if axis == 0:
135-
diff = lu.mul_expr(diff_mat, Y)
136-
else:
137-
diff = lu.rmul_expr(Y, diff_mat)
138-
return (Y, [lu.create_eq(arg_objs[0], diff)])

cvxpy/atoms/affine/diff.py

Lines changed: 42 additions & 20 deletions
Original file line numberDiff line numberDiff line change
@@ -14,38 +14,60 @@
1414
limitations under the License.
1515
"""
1616

17+
from numpy.lib.array_utils import normalize_axis_index
18+
1719
from cvxpy.expressions.expression import Expression
1820

1921

2022
def diff(x, k: int = 1, axis: int = 0):
21-
"""Vector of kth order differences.
23+
"""Computes kth order differences along the specified axis.
24+
25+
Takes in an array and returns an array with the kth order differences
26+
along the given axis. The output shape is the same as the input except
27+
the size along the specified axis is reduced by k.
2228
23-
Takes in a vector of length n and returns a vector
24-
of length n-k of the kth order differences.
29+
diff(x) returns the differences between adjacent elements along axis 0:
30+
[x[1] - x[0], x[2] - x[1], ...]
2531
26-
diff(x) returns the vector of differences between
27-
adjacent elements in the vector, that is
32+
diff(x, 2) is the second-order differences, equivalently diff(diff(x))
2833
29-
[x[2] - x[1], x[3] - x[2], ...]
34+
diff(x, 0) returns the array x unchanged
3035
31-
diff(x, 2) is the second-order differences vector,
32-
equivalently diff(diff(x))
36+
Parameters
37+
----------
38+
x : Expression or array-like
39+
Input array.
40+
k : int, optional
41+
The number of times values are differenced. Default is 1.
42+
axis : int, optional
43+
The axis along which the difference is taken. Default is 0.
44+
Note: NumPy's np.diff uses axis=-1 as default.
3345
34-
diff(x, 0) returns the vector x unchanged
46+
Returns
47+
-------
48+
Expression
49+
The kth order differences along the specified axis.
3550
"""
3651
x = Expression.cast_to_const(x)
37-
if (axis == 1 and x.ndim < 2) or x.ndim == 0:
52+
53+
# Validate and normalize axis (handles negative indices)
54+
if x.ndim == 0:
3855
raise ValueError("Invalid axis given input dimensions.")
39-
elif axis == 1:
40-
x = x.T
56+
axis = normalize_axis_index(axis, x.ndim)
4157

42-
# Always test shape[0] because if axis == 1 x is transposed.
43-
if k < 0 or k >= x.shape[0]:
58+
# Validate k
59+
if k < 0 or k >= x.shape[axis]:
4460
raise ValueError("Must have k >= 0 and X must have < k elements along "
4561
"axis")
46-
for i in range(k):
47-
if x.ndim == 2:
48-
x = x[1:, :] - x[:-1, :]
49-
else:
50-
x = x[1:] - x[:-1]
51-
return x.T if axis == 1 else x
62+
63+
# Apply k iterations of first-order difference along axis
64+
for _ in range(k):
65+
slices_upper = [slice(None)] * x.ndim
66+
slices_upper[axis] = slice(1, None)
67+
68+
slices_lower = [slice(None)] * x.ndim
69+
slices_lower[axis] = slice(None, -1)
70+
71+
x = x[tuple(slices_upper)] - x[tuple(slices_lower)]
72+
73+
return x

0 commit comments

Comments
 (0)