Skip to content

Commit 43abb67

Browse files
authored
Basic docs + API (#8)
* background * update api + docs
1 parent fe32671 commit 43abb67

File tree

5 files changed

+301
-44
lines changed

5 files changed

+301
-44
lines changed

Project.toml

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -4,7 +4,9 @@ authors = ["Klamkin", "Michael <michael@klamkin.com> and contributors"]
44
version = "1.0.0-DEV"
55

66
[deps]
7+
DifferentiationInterface = "a0c0ee7d-e4b9-4e03-894e-1c5f64a51d63"
78
Dualization = "191a621a-6537-11e9-281d-650236a99e60"
9+
ForwardDiff = "f6369f11-7733-5829-9624-2563aa707210"
810
JuMP = "4076af6c-e467-56ae-b986-b466b2749572"
911
LinearAlgebra = "37e2e46d-f89d-539d-b4ee-838fcccc9c8e"
1012
MathOptInterface = "b8f27783-ece8-5eb3-8dc8-9495eed66fee"
@@ -17,8 +19,6 @@ MathOptSetDistances = "=0.2.11"
1719

1820
[extras]
1921
Clarabel = "61c947e1-3e6d-4ee4-985a-eec8c727bd6e"
20-
DifferentiationInterface = "a0c0ee7d-e4b9-4e03-894e-1c5f64a51d63"
21-
ForwardDiff = "f6369f11-7733-5829-9624-2563aa707210"
2222
HTTP = "cd3eb016-35fb-5094-929b-558a96fad6f3"
2323
HiGHS = "87dc4568-4c63-4d18-b0c0-bb2238e4078b"
2424
JSON = "682c06a0-de6a-54ab-a142-c8b1cf79cde6"
@@ -27,4 +27,4 @@ Pkg = "44cfe95a-1eb2-52ea-b672-e2afdf69b78f"
2727
Test = "8dfed614-e22c-5e08-85e1-65c5234f0b40"
2828

2929
[targets]
30-
test = ["Test", "HTTP", "JSON", "Clarabel", "HiGHS", "Pkg", "DifferentiationInterface", "ForwardDiff", "ParametricOptInterface"]
30+
test = ["Test", "HTTP", "JSON", "Clarabel", "HiGHS", "Pkg", "ParametricOptInterface"]

docs/src/index.md

Lines changed: 220 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -1,7 +1,224 @@
11
# L2ODLL.jl
22

3-
Documentation for L2ODLL.jl
4-
53
!!! warning
64
This documentation is a work in progress.
7-
Please open an issue if content is missing / erroneous
5+
Please open an issue if content is missing / erroneous.
6+
7+
L2ODLL.jl implements the Dual Lagrangian Learning (DLL) method of [Tanneau and Hentenryck (2024)](https://arxiv.org/pdf/2402.03086) using JuMP.
8+
9+
## Installation
10+
11+
```julia
12+
import Pkg
13+
Pkg.add(; url="https://github.com/LearningToOptimize/L2ODLL.jl")
14+
```
15+
16+
## Usage
17+
18+
19+
This package simplifies the implementation of DLL by taking as input a primal JuMP model, then automatically generating the dual projection and completion functions which can be used in the training and inference of DLL models. The basic usage is as follows:
20+
21+
22+
#### Define your (primal) model using JuMP
23+
24+
For the purposes of this example, we'll use a portfolio optimization problem.
25+
```julia
26+
using JuMP, LinearAlgebra
27+
28+
model = Model()
29+
30+
# define constant problem data
31+
Σ = [166 34 58; 34 64 4; 58 4 100] / 100^2
32+
N = size(Σ, 1)
33+
34+
# define variables
35+
@variable(model, x[1:N])
36+
set_lower_bound.(x, 0) # we explicitly set upper and lower bounds
37+
set_upper_bound.(x, 1) # in order to use the BoundDecomposition
38+
39+
# define parameteric problem data
40+
μ0 = randn(N)
41+
γ0 = rand()
42+
@variable(model, μ[1:N] in MOI.Parameter.(μ0))
43+
@variable(model, γ in MOI.Parameter(γ0))
44+
45+
# define constraints
46+
@constraint(model, simplex, sum(x) == 1)
47+
@constraint(model, risk, [γ; cholesky(Σ).L * x] in SecondOrderCone())
48+
49+
# define objective
50+
@objective(model, Max, dot(μ,x))
51+
```
52+
53+
#### Decompose and build the functions
54+
55+
Since all the variables have finite bounds, L2ODLL will automatically pick the `BoundDecomposition`.
56+
```julia
57+
using L2ODLL
58+
59+
L2ODLL.decompose!(model)
60+
```
61+
62+
Now, L2ODLL has automatically generated the dual projection and completion layer. To compute the dual objective value and gradient with respect to the prediction, use:
63+
```julia
64+
param_value = ... # some values for μ and γ
65+
y_predicted = nn(param_value) # e.g. neural network inference
66+
67+
dobj = L2ODLL.dual_objective(model, y_predicted, param_value)
68+
dobj_wrt_y = L2ODLL.dual_objective_gradient(model, y_predicted, param_value)
69+
```
70+
71+
This also works with batches, using broadcasting:
72+
```julia
73+
dobj = L2ODLL.dual_objective.(model, y_predicted_batch, param_value_batch)
74+
dobj_wrt_y = L2ODLL.dual_objective_gradient.(model, y_predicted_batch, param_value_batch)
75+
```
76+
77+
!!! warning
78+
These functions currently run on the CPU. A batched GPU-friendly version is coming soon.
79+
80+
## Math Background
81+
82+
### Decomposition
83+
84+
In DLL, the primal constraints (dual variables) are decomposed into a predicted set and a completed set.
85+
Consider the primal-dual pair:
86+
```math
87+
\begin{equation}
88+
\begin{aligned}
89+
& \min\nolimits_{x} & c^\top x
90+
\\
91+
& \;\;\text{s.t.} & Ax + b \in \mathcal{C}
92+
\\
93+
& & x \in \mathbb{R}^n
94+
\end{aligned}
95+
\quad\quad\quad\quad
96+
\begin{aligned}
97+
& \max\nolimits_{y} & - b^\top y
98+
\\
99+
& \;\;\text{s.t.} & A^\top y = c
100+
\\
101+
& & y \in \mathcal{C}^*
102+
\end{aligned}
103+
\end{equation}
104+
```
105+
After the decomposition, we have:
106+
```math
107+
\begin{equation}
108+
\begin{aligned}
109+
& \min\nolimits_{x} & c^\top x
110+
\\
111+
& \;\;\text{s.t.} & Ax + b \in \mathcal{C}
112+
\\
113+
& \;\;\phantom{\text{s.t.}} & Hx + h \in \mathcal{K}
114+
\\
115+
& & x \in \mathbb{R}^n
116+
\end{aligned}
117+
\quad\quad\quad\quad
118+
\begin{aligned}
119+
& \max\nolimits_{y} & - b^\top y
120+
\\
121+
& \;\;\text{s.t.} & A^\top y + H^\top z = c
122+
\\
123+
& & y \in \mathcal{C}^*,\; z \in \mathcal{K}^*
124+
\end{aligned}
125+
\end{equation}
126+
```
127+
128+
Then, the completion model is:
129+
130+
```math
131+
\begin{equation}
132+
\begin{aligned}
133+
& \max\nolimits_{z} & - h^\top z - b^\top y
134+
\\
135+
& \;\;\text{s.t.} & H z = c - A^\top y
136+
\\
137+
& & z \in \mathcal{K}^*
138+
\end{aligned}
139+
\end{equation}
140+
```
141+
142+
To train the neural network, we need the gradient of the optimal value with respect to the predicted $y$. This is $\nabla_y = -b-Ax$ where $x$ is the optimal dual solution corresponding to the affine constraints in the completion model. In the special cases below, we specify just the expression for $x$ in this formula.
143+
144+
145+
#### Bounded Decomposition
146+
147+
When all primal variables have finite upper and lower bounds, a natural way to decompose the constraints is to have $z$ correspond to the bound constraints, and $y$ correspond to the main constraints, i.e.
148+
149+
```math
150+
\begin{equation}
151+
\begin{aligned}
152+
& \min\nolimits_{x} & c^\top x
153+
\\
154+
& \;\;\text{s.t.} & Ax + b \in \mathcal{C}
155+
\\
156+
& & l \leq x \leq u
157+
\end{aligned}
158+
\quad\quad\quad\quad
159+
\begin{aligned}
160+
& \max\nolimits_{y,z_l,z_u} & - b^\top y - l^\top z_l - u^\top z_u
161+
\\
162+
& \;\;\text{s.t.} & A^\top y + I z_l + I z_u = c
163+
\\
164+
& & y \in \mathcal{C}^*,\; z_l \in \mathbb{R}_+^n,\; z_u \in \mathbb{R}_-^n
165+
\end{aligned}
166+
\end{equation}
167+
```
168+
169+
Then, the completion model is:
170+
171+
```math
172+
\begin{equation}
173+
\begin{aligned}
174+
& \max\nolimits_{z_l,z_u} & - l^\top z_l - u^\top z_u - b^\top y
175+
\\
176+
& \;\;\text{s.t.} & I z_l + I z_u = c - A^\top y
177+
\\
178+
& & z_l \in \mathbb{R}_+^n,\; z_u \in \mathbb{R}_-^n
179+
\end{aligned}
180+
\end{equation}
181+
```
182+
183+
This model admits a closed form solution, $z_l = |c-A^\top y|^+$ and $z_u = -|c-A^\top y|^-$. Furthermore, the $x$ that defines the (sub-)gradient is given element-wise by $l$ if $z_l > 0$, $u$ if $z_u < 0$, and $x\in[l,u]$ otherwise.
184+
185+
186+
#### (Strictly) Convex QP
187+
188+
In the convex QP case, the primal has a strictly convex quadratic objective function, i.e. $Q\succ 0$. In that case it is natural to use the main constraints as the predicted set and to complete the quadratic slack dual variables.
189+
190+
```math
191+
\begin{equation}
192+
\begin{aligned}
193+
& \min\nolimits_{x} & x^\top Q x + c^\top x
194+
\\
195+
& \;\;\text{s.t.} & Ax + b \in \mathcal{C}
196+
\\
197+
& & x \in \mathbb{R}^n
198+
\end{aligned}
199+
\quad\quad\quad\quad
200+
\begin{aligned}
201+
& \max\nolimits_{y} & - b^\top y - z^\top Q z
202+
\\
203+
& \;\;\text{s.t.} & A^\top y + Qz = c
204+
\\
205+
& & y \in \mathcal{C}^*,\; z \in \mathbb{R}^n
206+
\end{aligned}
207+
\end{equation}
208+
```
209+
210+
Then, the completion model is:
211+
212+
```math
213+
\begin{equation}
214+
\begin{aligned}
215+
& \max\nolimits_{z} & - z^\top Q z - b^\top y
216+
\\
217+
& \;\;\text{s.t.} & Q z = c - A^\top y
218+
\\
219+
& & z \in \mathbb{R}^n
220+
\end{aligned}
221+
\end{equation}
222+
```
223+
224+
This model admits a closed form solution, $z = Q^{-1}(c - A^\top y)$. Furthermore, the closed form dual solution in this case is $x=z$.

src/L2ODLL.jl

Lines changed: 35 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -5,11 +5,15 @@ import JuMP
55
import LinearAlgebra
66
import MathOptSetDistances
77
import SparseArrays
8+
import DifferentiationInterface
9+
import ForwardDiff
810

911
const MOI = JuMP.MOI
1012
const MOIB = JuMP.MOIB
1113
const MOIU = JuMP.MOIU
1214
const MOSD = MathOptSetDistances
15+
const DI = DifferentiationInterface
16+
const ADTypes = DI.ADTypes
1317

1418
abstract type AbstractDecomposition end # must have p_ref and y_ref
1519

@@ -26,6 +30,30 @@ struct DLLCache
2630
decomposition::AbstractDecomposition
2731
end
2832

33+
function decompose!(model::JuMP.Model, decomposition::AbstractDecomposition;
34+
optimizer=nothing, proj_fn=nothing, dll_layer_builder=nothing
35+
)
36+
return build_cache(model, decomposition; optimizer, proj_fn, dll_layer_builder)
37+
end
38+
39+
function dual_objective(model::JuMP.Model, y_predicted, param_value)
40+
cache = get_cache(model)
41+
@assert length.(y_predicted) == L2ODLL.y_shape(cache)
42+
return cache.dll_layer(y_predicted, param_value)
43+
end
44+
45+
function dual_objective_gradient(model::JuMP.Model, y_predicted, param_value; ad_type::ADTypes.AbstractADType=DI.AutoForwardDiff())
46+
cache = get_cache(model)
47+
y_shape = L2ODLL.y_shape(cache)
48+
@assert length.(y_predicted) == y_shape
49+
dobj_wrt_y = DI.gradient(
50+
(y,p) -> dual_objective(model, L2ODLL.unflatten_y(y, y_shape), p),
51+
ad_type,
52+
L2ODLL.flatten_y(y_predicted), DI.Constant(param_value)
53+
)
54+
return L2ODLL.unflatten_y(dobj_wrt_y, y_shape)
55+
end
56+
2957
function build_cache(model::JuMP.Model, decomposition::AbstractDecomposition;
3058
optimizer=nothing, proj_fn=nothing, dll_layer_builder=nothing
3159
)
@@ -43,7 +71,13 @@ function build_cache(model::JuMP.Model, decomposition::AbstractDecomposition;
4371
jump_builder(decomposition, proj_fn, dual_model, optimizer)
4472
end
4573

46-
return DLLCache(proj_fn, dll_layer, dual_model, decomposition)
74+
cache = DLLCache(proj_fn, dll_layer, dual_model, decomposition)
75+
model.ext[:_L2ODLL_cache] = cache
76+
return cache
77+
end
78+
79+
function get_cache(model::JuMP.Model)
80+
return model.ext[:_L2ODLL_cache]
4781
end
4882

4983
function make_completion_model(cache::DLLCache)

src/layers/generic.jl

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -16,7 +16,7 @@ function jump_builder(decomposition::AbstractDecomposition, proj_fn::Function, d
1616
JuMP.set_optimizer(completion_model, optimizer)
1717
silent && JuMP.set_silent(completion_model)
1818
completion_model.ext[:🔒] = ReentrantLock()
19-
# TODO: use DiffOpt to define frule/rrule
19+
# TODO: define frule/rrule using b-Ax
2020
# TODO: handle infeasibility?
2121
return (y_pred, param_value) -> begin
2222
lock(completion_model.ext[:🔒])

0 commit comments

Comments
 (0)