Skip to content

Commit 1bcae8f

Browse files
author
ArnaudDeza
committed
initial slides
1 parent 9dce1fc commit 1bcae8f

File tree

8 files changed

+917
-0
lines changed

8 files changed

+917
-0
lines changed
264 KB
Binary file not shown.

class02/SQP.tex

Lines changed: 123 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,123 @@
1+
\section{Sequential Quadratic Programming (SQP)}
2+
3+
% ------------------------------------------------
4+
\begin{frame}{What is SQP?}
5+
\textbf{Idea:} Solve a nonlinear, constrained problem by repeatedly solving a \emph{quadratic program (QP)} built from local models.\\[4pt]
6+
\begin{itemize}
7+
\item Linearize constraints; quadratic model of the Lagrangian/objective.
8+
\item Each iteration: solve a QP to get a step \(d\), update \(x \leftarrow x + \alpha d\).
9+
\item Strength: strong local convergence (often superlinear) with good Hessian info.
10+
\end{itemize}
11+
\end{frame}
12+
13+
% ------------------------------------------------
14+
\begin{frame}{Target Problem (NLP)}
15+
\[
16+
\min_{x \in \R^n} \ f(x)
17+
\quad
18+
\text{s.t.}\quad
19+
g(x)=0,\quad h(x)\le 0
20+
\]
21+
\begin{itemize}
22+
\item \(f:\R^n\!\to\!\R\), \(g:\R^n\!\to\!\R^{m}\) (equalities), \(h:\R^n\!\to\!\R^{p}\) (inequalities).
23+
\item KKT recap (at candidate optimum \(x^\star\)):
24+
\[
25+
\exists \ \lambda \in \R^{m},\ \mu \in \R^{p}_{\ge 0}:
26+
\ \grad f(x^\star) + \nabla g(x^\star)^T\lambda + \nabla h(x^\star)^T \mu = 0,
27+
\]
28+
\[
29+
g(x^\star)=0,\quad h(x^\star)\le 0,\quad \mu \ge 0,\quad \mu \odot h(x^\star) = 0.
30+
\]
31+
\end{itemize}
32+
\end{frame}
33+
34+
% ------------------------------------------------
35+
\begin{frame}{From NLP to a QP (Local Model)}
36+
At iterate \(x_k\) with multipliers \((\lambda_k,\mu_k)\):\\[4pt]
37+
\textbf{Quadratic model of the Lagrangian}
38+
\[
39+
m_k(d) = \ip{\grad f(x_k)}{d} + \tfrac{1}{2} d^T B_k d
40+
\]
41+
with \(B_k \approx \nabla^2_{xx}\Lag(x_k,\lambda_k,\mu_k)\).\\[6pt]
42+
\textbf{Linearized constraints}
43+
\[
44+
g(x_k) + \nabla g(x_k)\, d = 0,\qquad
45+
h(x_k) + \nabla h(x_k)\, d \le 0.
46+
\]
47+
\end{frame}
48+
49+
% ------------------------------------------------
50+
\begin{frame}{The SQP Subproblem (QP)}
51+
\[
52+
\begin{aligned}
53+
\min_{d \in \R^n}\quad & \grad f(x_k)^T d + \tfrac{1}{2} d^T B_k d \\
54+
\text{s.t.}\quad & \nabla g(x_k)\, d + g(x_k) = 0, \\
55+
& \nabla h(x_k)\, d + h(x_k) \le 0.
56+
\end{aligned}
57+
\]
58+
\begin{itemize}
59+
\item Solve QP \(\Rightarrow\) step \(d_k\) and updated multipliers \((\lambda_{k+1},\mu_{k+1})\).
60+
\item Update \(x_{k+1} = x_k + \alpha_k d_k\) (line search or trust-region).
61+
\end{itemize}
62+
\end{frame}
63+
64+
% ------------------------------------------------
65+
\begin{frame}{Algorithm Sketch (SQP)}
66+
\begin{enumerate}
67+
\item Start with \(x_0\), multipliers \((\lambda_0,\mu_0)\), and \(B_0 \succ 0\).
68+
\item Build QP at \(x_k\) with \(B_k\), linearized constraints.
69+
\item Solve QP \(\Rightarrow\) get \(d_k\), \((\lambda_{k+1},\mu_{k+1})\).
70+
\item Globalize: line search on merit or use filter/TR to choose \(\alpha_k\).
71+
\item Update \(x_{k+1} = x_k + \alpha_k d_k\), update \(B_{k+1}\) (e.g., BFGS).
72+
\end{enumerate}
73+
\end{frame}
74+
75+
% ------------------------------------------------
76+
\begin{frame}{Toy Example (Local Models)}
77+
\textbf{Problem:}
78+
\[
79+
\min_{x\in\R^2} \ \tfrac{1}{2}\norm{x}^2
80+
\quad \text{s.t.} \quad g(x)=x_1^2 + x_2 - 1 = 0,\ \ h(x)=x_2 - 0.2 \le 0.
81+
\]
82+
At \(x_k\), build QP with
83+
\[
84+
\grad f(x_k)=x_k,\quad B_k=I,\quad
85+
\nabla g(x_k) = \begin{bmatrix} 2x_{k,1} & 1 \end{bmatrix},\
86+
\nabla h(x_k) = \begin{bmatrix} 0 & 1 \end{bmatrix}.
87+
\]
88+
Solve for \(d_k\), then \(x_{k+1}=x_k+\alpha_k d_k\).
89+
\end{frame}
90+
91+
92+
% ------------------------------------------------
93+
\begin{frame}{Globalization: Making SQP Robust}
94+
SQP is an important method, and there are many issues to be considered to obtain an \textbf{efficient} and \textbf{reliable} implementation:
95+
\begin{itemize}
96+
\item Efficient solution of the linear systems at each Newton Iteration (Matrix block structure can be exploited.
97+
\item Quasi-Newton approximations to the Hessian.
98+
\item Trust region, line search, etc. to improve robustnes (i.e TR: restrict \(\norm{d}\) to maintain model validity.)
99+
\item Treatment of constraints (equality and inequality) during the iterative process.
100+
\item Selection of good starting guess for $\lambda$.
101+
\end{itemize}
102+
\end{frame}
103+
104+
105+
106+
107+
108+
109+
% ------------------------------------------------
110+
\begin{frame}{Final Takeaways on SQP}
111+
\textbf{When SQP vs.\ Interior-Point?}
112+
\begin{itemize}
113+
\item \textbf{SQP}: strong local convergence; warm-start friendly; natural for NMPC.
114+
\item \textbf{IPM}: very robust for large, strictly feasible problems; good for dense inequality sets.
115+
\item In practice: both are valuable—choose to match problem structure and runtime needs.
116+
\end{itemize}
117+
\textbf{Takeaways of SQP}
118+
\begin{itemize}
119+
\item SQP = Newton-like method using a sequence of structured QPs.
120+
\item Globalization (merit/filter/TR) makes it reliable from poor starts.
121+
\item Excellent fit for control (NMPC/trajectory optimization) due to sparsity and warm starts.
122+
\end{itemize}
123+
\end{frame}

class02/eq_constraints.tex

Lines changed: 157 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,157 @@
1+
2+
%\section{Part II -- Equality constraints: KKT, Newton vs. Gauss–Newton}
3+
\section{Constrained Optimization}
4+
5+
% ==== Equality constraints: KKT, Newton vs. Gauss–Newton ====
6+
7+
\begin{frame}{Equality-constrained minimization: geometry and conditions}
8+
\textbf{Problem.}; $\min_{x\in\mathbb{R}^n} f(x)\quad \text{s.t.}\quad C(x)=0, C:\mathbb{R}^n\to\mathbb{R}^m$.
9+
10+
\medskip
11+
\textbf{Geometric picture.} At an optimum on the manifold $C(x)=0$, the negative gradient must lie in the tangent space:
12+
13+
$$
14+
\grad f(x^\star)\ \perp\ \mathcal{T}_{x^\star}=\{p:\; J_C(x^\star)p=0\}.
15+
$$
16+
17+
Equivalently, the gradient is a linear combination of constraint normals:
18+
19+
$$
20+
\grad f(x^\star)+J_C(x^\star)^{\!T}\lambda^\star=0,\qquad C(x^\star)=0\quad(\lambda^\star\in\mathbb{R}^m).
21+
$$
22+
23+
\medskip
24+
\textbf{Lagrangian.}; $L(x,\lambda)=f(x)+\lambda^{\!T}C(x)$.
25+
\end{frame}
26+
27+
\begin{frame}{A nicer visual explanation/derivation of KKT conditions}
28+
\begin{center}
29+
Quick little whiteboard derivation
30+
\end{center}
31+
32+
\end{frame}
33+
34+
35+
36+
37+
38+
\begin{frame}{KKT system for equalities (first-order necessary conditions)}
39+
\textbf{KKT (FOC).}
40+
41+
$$
42+
\grad_x L(x,\lambda)=\grad f(x)+J_C(x)^{\!T}\lambda=0,\qquad \grad_\lambda L(x,\lambda)=C(x)=0.
43+
$$
44+
45+
\textbf{Solve by Newton on KKT:} linearize both optimality and feasibility:
46+
47+
$$
48+
\begin{bmatrix}
49+
\hess f(x) + \sum_{i=1}^m \lambda_i\,\hess C_i(x) & J_C(x)^{\!T}\\[2pt]
50+
J_C(x) & 0
51+
\end{bmatrix}
52+
\begin{bmatrix}\Delta x\\ \Delta\lambda\end{bmatrix}
53+
=-
54+
\begin{bmatrix}
55+
\grad f(x)+J_C(x)^{\!T}\lambda\\ C(x)
56+
\end{bmatrix}.
57+
$$
58+
59+
\textit{Notes.} This is a symmetric \emph{saddle-point} system; typical solves use block elimination (Schur complement) or sparse factorizations.
60+
61+
62+
\end{frame}
63+
64+
\begin{frame}{Move to Julia Code}
65+
\begin{center}
66+
\textbf{Quick Demo of Julia Notebook: part2\_eq\_constraints.ipynb}
67+
\end{center}
68+
\end{frame}
69+
70+
\begin{frame}{Numerical practice: Newton on KKT}
71+
\textbf{When it works best.}
72+
\begin{itemize}
73+
\item Near a regular solution with $J_{C}(x^\star)$ full row rank and positive-definite reduced Hessian.
74+
\item With a globalization (line search on a merit function) and mild regularization for robustness.
75+
\end{itemize}
76+
77+
\textbf{Common safeguards.}
78+
\begin{itemize}
79+
\item \emph{Regularize} the $(1,1)$ block to ensure a good search direction (e.g., add $\beta I$).
80+
\item \emph{Merit/penalty} line search to balance feasibility vs.\ optimality during updates.
81+
\item \emph{Scaling} constraints to improve conditioning of the KKT system.
82+
\end{itemize}
83+
84+
85+
\end{frame}
86+
87+
\begin{frame}{Gauss--Newton vs.\ full Newton on KKT}
88+
\textbf{Full Newton Hessian of the Lagrangian:} $\nabla_{xx}^2 L(x,\lambda) &= \hess f(x)+\sum_{i=1}^m \lambda_i\,\hess C_i(x)$
89+
90+
\textbf{Gauss--Newton approximation:} drop the \emph{constraint-curvature} term $\sum_i \lambda_i,\hess C_i(x)$:
91+
92+
\begin{align*}
93+
H_{\text{GN}}(x) &\approx \hess f(x).
94+
\end{align*}
95+
96+
\textbf{Trade-offs (high level).}
97+
\begin{itemize}
98+
\item \emph{Full Newton:} fewer iterations near the solution, but each step is costlier and can be less robust far from it.
99+
\item \emph{Gauss--Newton:} cheaper per step and often more stable; may need more iterations but wins in wall-clock on many problems.
100+
\end{itemize}
101+
102+
\textbf{Practice tip.} Start with GN (with line search); switch to full Newton (or add low-rank updates) as feasibility improves.
103+
104+
105+
\end{frame}
106+
107+
% ==== Inequalities & KKT: complementarity ====
108+
109+
\begin{frame}{Inequality-constrained minimization and KKT}
110+
\textbf{Problem.} $\quad \quad \min f(x)\quad\text{s.t.}\quad c(x)\ge 0, \quad \quad c:\mathbb{R}^n\to\mathbb{R}^p$.
111+
112+
\textbf{KKT conditions (first-order).}
113+
114+
$$
115+
\begin{aligned}
116+
&\text{Stationarity:} && \grad f(x)-J_c(x)^{\!T}\lambda=0,\\
117+
&\text{Primal feasibility:} && c(x)\ge 0,\\
118+
&\text{Dual feasibility:} && \lambda\ge 0,\\
119+
&\text{Complementarity:} && \lambda^{\!T}c(x)=0\quad(\text{i.e., }\lambda_i c_i(x)=0\ \forall i).
120+
\end{aligned}
121+
$$
122+
123+
\textbf{Interpretation.}
124+
\begin{itemize}
125+
\item \emph{Active} constraints: $c_i(x)=0 \Rightarrow \lambda_i\ge 0$ can be nonzero (acts like an equality).
126+
\item \emph{Inactive} constraints: $c_i(x)>0 \Rightarrow \lambda_i=0$ (no influence on optimality).
127+
\end{itemize}
128+
\end{frame}
129+
130+
131+
132+
133+
\begin{frame}{Complementarity in plain English (and why Newton is tricky)}
134+
\footnotesize
135+
136+
\textbf{What $\lambda_i c_i(x)=0$ means.}
137+
\begin{itemize}
138+
\item Tight constraint ($c_i=0$) $\Rightarrow$ can press back ($\lambda_i\ge0$).
139+
\item Loose constraint ($c_i>0$) $\Rightarrow$ no force ($\lambda_i=0$).
140+
\end{itemize}
141+
142+
\textbf{Why naive Newton fails.}
143+
\begin{itemize}
144+
\item Complementarity = nonsmooth + inequalities ($\lambda\ge0$, $c(x)\ge0$).
145+
\item Equality-style Newton can violate nonnegativity or bounce across boundary.
146+
\end{itemize}
147+
148+
\textbf{Two main strategies (preview).}
149+
\begin{itemize}
150+
\item \emph{Active-set:} guess actives $\Rightarrow$ solve equality-constrained subproblem, update set.
151+
\item \emph{Barrier/PDIP/ALM:} smooth or relax complementarity, damped Newton, drive relaxation $\to 0$.
152+
\end{itemize}
153+
\end{frame}
154+
155+
156+
157+

0 commit comments

Comments
 (0)