Skip to content

Commit a78fc9d

Browse files
author
ArnaudDeza
committed
updated notes
1 parent 4bcd764 commit a78fc9d

File tree

9 files changed

+494
-151
lines changed

9 files changed

+494
-151
lines changed
241 KB
Binary file not shown.

class02/eq_constraints.tex

Lines changed: 73 additions & 25 deletions
Original file line numberDiff line numberDiff line change
@@ -5,7 +5,7 @@ \section{Constrained Optimization}
55
% ==== Equality constraints: KKT, Newton vs. Gauss–Newton ====
66

77
\begin{frame}{Equality-constrained minimization: geometry and conditions}
8-
\textbf{Problem.}; $\min_{x\in\mathbb{R}^n} f(x)\quad \text{s.t.}\quad C(x)=0, C:\mathbb{R}^n\to\mathbb{R}^m$.
8+
\textbf{Problem}; $\min_{x\in\mathbb{R}^n} f(x)\quad \text{s.t.}\quad C(x)=0, C:\mathbb{R}^n\to\mathbb{R}^m$.
99

1010
\medskip
1111
\textbf{Geometric picture.} At an optimum on the manifold $C(x)=0$, the negative gradient must lie in the tangent space:
@@ -33,6 +33,38 @@ \section{Constrained Optimization}
3333

3434

3535

36+
\section{Constrained Optimization}
37+
38+
% ==== Slide 1: Picture-first intuition ====
39+
\begin{frame}[t]{Equality constraints: picture first}
40+
\setbeamercovered{invisible}
41+
42+
\textbf{Goal.} Minimize $f(x)$ while staying on the surface $C(x)=0$.
43+
44+
\uncover<2->{\textbf{Feasible set as a surface.} Think of $C(x)=0$ as a smooth surface embedded in $\mathbb{R}^n$ (a manifold).}
45+
46+
\uncover<3->{\textbf{Move without breaking the constraint.} Tangent directions are the “along-the-surface” moves that keep $C(x)$ unchanged to first order. Intuitively: tiny steps that slide on the surface.}
47+
48+
\uncover<4->{\textbf{What must be true at the best point.} At $x^\star$, there is no downhill direction that stays on the surface. Equivalently, the usual gradient of $f$ has \emph{no component along the surface}.}
49+
50+
\uncover<5->{\textbf{Normals enter the story.} If the gradient can’t point along the surface, it must point \emph{through} it—i.e., it aligns with a combination of the surface’s normal directions (one normal per constraint).}
51+
\end{frame}
52+
53+
% ==== Slide 2: From picture to KKT ====
54+
\begin{frame}[t]{From the picture to KKT (equality case)}
55+
\setbeamercovered{invisible}
56+
57+
\textbf{KKT conditions at a regular local minimum (equality only):}
58+
59+
\uncover<1->{\textbf{1) Feasibility:} $C(x^\star)=0$. \emph{(We’re on the surface.)}}
60+
61+
\uncover<2->{\textbf{2) Stationarity:} $\nabla f(x^\star) + J_C(x^\star)^{\!T}\lambda^\star = 0$. \emph{(The gradient is a linear combination of the constraint normals.)}}
62+
63+
\uncover<3->{\textbf{Lagrangian viewpoint.} Define $L(x,\lambda)=f(x)+\lambda^{\!T}C(x)$. At a solution, $x^\star$ is a stationary point of $L$ w.r.t.\ $x$ (that’s the stationarity equation), while $C(x^\star)=0$ enforces feasibility.}
64+
65+
\uncover<4->{\textbf{What the multipliers mean.} The vector $\lambda^\star$ tells how strongly each constraint “pushes back” at the optimum; it also measures sensitivity of the optimal value to small changes in the constraints.}
66+
67+
\end{frame}
3668

3769

3870
\begin{frame}{KKT system for equalities (first-order necessary conditions)}
@@ -57,9 +89,12 @@ \section{Constrained Optimization}
5789
$$
5890

5991
\textit{Notes.} This is a symmetric \emph{saddle-point} system; typical solves use block elimination (Schur complement) or sparse factorizations.
92+
\end{frame}
93+
94+
95+
6096

6197

62-
\end{frame}
6398

6499
\begin{frame}{Move to Julia Code}
65100
\begin{center}
@@ -68,42 +103,55 @@ \section{Constrained Optimization}
68103
\end{frame}
69104

70105
\begin{frame}{Numerical practice: Newton on KKT}
71-
\textbf{When it works best.}
72-
\begin{itemize}
73-
\item Near a regular solution with $J_{C}(x^\star)$ full row rank and positive-definite reduced Hessian.
74-
\item With a globalization (line search on a merit function) and mild regularization for robustness.
75-
\end{itemize}
76-
77-
\textbf{Common safeguards.}
78-
\begin{itemize}
79-
\item \emph{Regularize} the $(1,1)$ block to ensure a good search direction (e.g., add $\beta I$).
80-
\item \emph{Merit/penalty} line search to balance feasibility vs.\ optimality during updates.
81-
\item \emph{Scaling} constraints to improve conditioning of the KKT system.
82-
\end{itemize}
106+
\setbeamercovered{invisible}
107+
108+
109+
\textbf{When it works best.}
110+
\begin{itemize}
111+
\item Near a regular solution with $J_{C}(x^\star)$ full row rank and positive-definite reduced Hessian.
112+
\item With a globalization (line search on a merit function) and mild regularization for robustness.
113+
\end{itemize}
114+
115+
% --- Part 2: appears on the 2nd click only ---
116+
\uncover<2->{%
117+
\textbf{Common safeguards.}
118+
\begin{itemize}
119+
\item \emph{Regularize} the $(1,1)$ block to ensure a good search direction (e.g., add $\beta I$).
120+
\item \emph{Merit/penalty} line search to balance feasibility vs.\ optimality during updates.
121+
\item \emph{Scaling} constraints to improve conditioning of the KKT system.
122+
\end{itemize}
123+
}
124+
\end{frame}
83125

84126

85-
\end{frame}
127+
\begin{frame}{Gauss--Newton vs. full Newton on KKT}
86128

87-
\begin{frame}{Gauss--Newton vs.\ full Newton on KKT}
88-
\textbf{Full Newton Hessian of the Lagrangian:} $\nabla_{xx}^2 L(x,\lambda) &= \hess f(x)+\sum_{i=1}^m \lambda_i\,\hess C_i(x)$
129+
\uncover<1->{
130+
\textbf{Full Newton Hessian of the Lagrangian:}\quad
131+
$\nabla_{xx}^2 L(x,\lambda) = \nabla^2 f(x) + \sum_{i=1}^m \lambda_i\, \nabla^2 C_i(x)$
132+
}
89133

90-
\textbf{Gauss--Newton approximation:} drop the \emph{constraint-curvature} term $\sum_i \lambda_i,\hess C_i(x)$:
134+
\vspace{0.6em}
91135

92-
\begin{align*}
93-
H_{\text{GN}}(x) &\approx \hess f(x).
136+
\uncover<2->{
137+
\textbf{Gauss--Newton approximation:} drop the \emph{constraint-curvature} term
138+
$\sum_{i=1}^m \lambda_i\, \nabla^2 C_i(x)$:
139+
\begin{align*}
140+
H_{\text{GN}}(x) &\approx \nabla^2 f(x).
94141
\end{align*}
142+
}
95143

144+
\uncover<3->{
96145
\textbf{Trade-offs (high level).}
97146
\begin{itemize}
98-
\item \emph{Full Newton:} fewer iterations near the solution, but each step is costlier and can be less robust far from it.
99-
\item \emph{Gauss--Newton:} cheaper per step and often more stable; may need more iterations but wins in wall-clock on many problems.
147+
\item \emph{Full Newton:} fewer iterations near the solution, but each step is costlier and can be less robust far from it.
148+
\item \emph{Gauss--Newton:} cheaper per step and often more stable; may need more iterations but wins in wall-clock on many problems.
100149
\end{itemize}
101-
102-
\textbf{Practice tip.} Start with GN (with line search); switch to full Newton (or add low-rank updates) as feasibility improves.
103-
150+
}
104151

105152
\end{frame}
106153

154+
107155
% ==== Inequalities & KKT: complementarity ====
108156

109157
\begin{frame}{Inequality-constrained minimization and KKT}

class02/figures/log_barrier.png

31.6 KB
Loading
60 KB
Loading

class02/figures/tri_paper.png

73.1 KB
Loading

0 commit comments

Comments
 (0)