Skip to content

Commit bd4253b

Browse files
committed
add example docs
1 parent e059641 commit bd4253b

File tree

4 files changed

+266
-5
lines changed

4 files changed

+266
-5
lines changed

examples/blind_deconv.py

Lines changed: 72 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -38,16 +38,78 @@ def _():
3838
return BiconvexProblem, cp, mo, np, plt
3939

4040

41+
@app.cell(hide_code=True)
42+
def _(mo):
43+
mo.md(r"""
44+
## Introduction
45+
46+
Blind deconvolution is a technique used to recover some sharp signal or image from a blurred observation when the blur itself is unknown.
47+
It jointly estimates both the original signal and the blur kernel, with some prior knowledge about their structures.
48+
49+
Suppose we are given a data vector $d \in \mathbf{R}^{m + n - 1}$, which is the convolution of an unknown sparse signal $x \in \mathbf{R}^n$ and an unknown smooth vector $y \in \mathbf{R}^m$ with bounded $\ell_\infty$-norm (i.e., bounded largest entry).
50+
Additionally, we have the prior knowledge that both the vectors $x$ and $y$ are nonnegative.
51+
The corresponding blind deconvolution problem can be formulated as the following biconvex optimization problem:
52+
53+
\[
54+
\begin{array}{ll}
55+
\text{minimize} & {\|x \otimes y - d\|}_2^2 + \alpha_{\rm sp} {\|x\|}_1 + \alpha_{\rm sm} {\|Dy\|}_2^2\\
56+
\text{subject to} & x \succeq 0,\quad y \succeq 0\\
57+
& {\|y\|}_\infty \leq \beta
58+
\end{array}
59+
\]
60+
61+
with variables $x$ and $y$, where $\alpha_{\rm sp}, \alpha_{\rm sm} > 0$ are the regularization parameters for the sparsity of $x$ and smoothness of $y$, respectively, and $\beta > 0$ is the bound on the $\ell_\infty$-norm of the vector $y$.
62+
The matrix $D \in \mathbf{R}^{(m - 1) \times m}$ is the first-order difference operator, given by,
63+
64+
\[
65+
D = \left[\begin{array}{ccccc}
66+
1 & -1 &&&\\
67+
& 1 & -1 &&\\
68+
&& \ddots & \ddots &\\
69+
&&& 1 & -1
70+
\end{array}\right] \in \mathbf{R}^{(m - 1) \times m},
71+
\]
72+
73+
so that $Dy$ computes the vector of successive differences of $y$.
74+
The convolution $x \otimes y$ of the vectors $x$ and $y$ is given by
75+
76+
\[
77+
{(x \otimes y)}_k = \sum_{i + j = k} x_i y_j,\quad k = 1, \ldots, m + n - 1.
78+
\]
79+
""")
80+
return
81+
82+
83+
@app.cell(hide_code=True)
84+
def _(mo):
85+
mo.md(r"""
86+
## Generate problem data
87+
""")
88+
return
89+
90+
4191
@app.cell
42-
def _(BiconvexProblem, conv, cp, np):
92+
def _(np):
4393
n = 120
4494
m = 40
4595

4696
x0 = np.zeros(n)
4797
x0[6] = 1
4898
y0 = np.exp(-np.square(np.linspace(-2, 2, m)) * 2)
4999
d = np.convolve(x0, y0)
100+
return d, m, n, x0, y0
101+
50102

103+
@app.cell(hide_code=True)
104+
def _(mo):
105+
mo.md(r"""
106+
## Specify and solve the problem
107+
""")
108+
return
109+
110+
111+
@app.cell
112+
def _(BiconvexProblem, conv, cp, d, m, n):
51113
alpha_sp = 0.1
52114
alpha_sm = 0.2
53115
beta = 1
@@ -58,7 +120,15 @@ def _(BiconvexProblem, conv, cp, np):
58120
constr = [cp.norm(y, "inf") <= beta]
59121
prob = BiconvexProblem(cp.Minimize(obj), [[x], [y]], constr)
60122
prob.solve(cp.CLARABEL, gap_tolerance=1e-5, max_iter=200)
61-
return d, x, x0, y, y0
123+
return x, y
124+
125+
126+
@app.cell(hide_code=True)
127+
def _(mo):
128+
mo.md(r"""
129+
## Plot the results
130+
""")
131+
return
62132

63133

64134
@app.cell

examples/dict_learning.py

Lines changed: 50 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -38,14 +38,55 @@ def _():
3838
return BiconvexProblem, cp, mo, np, plt
3939

4040

41+
@app.cell(hide_code=True)
42+
def _(mo):
43+
mo.md(r"""
44+
## Introduction
45+
46+
We consider the sparse dictionary learning problem, which aims to find a dictionary matrix $D \in \mathbf{R}^{m \times k}$ and a sparse code matrix $X \in \mathbf{R}^{k \times n}$, such that the data matrix $Y \in \mathbf{R}^{m \times n}$ can be well approximated by their product $DX$, while the matrix $X$ is sparse and the matrix $D$ has bounded Frobenius norm.
47+
The dictionary learning problem can be formulated as the following biconvex optimization problem:
48+
49+
\[
50+
\begin{array}{ll}
51+
\text{minimize} & {\|DX - Y\|}_F^2 + \alpha {\|X\|}_1\\
52+
\text{subject to} & {\|D\|}_F \leq \beta
53+
\end{array}
54+
\]
55+
56+
with variables $D$ and $X$, where $\alpha > 0$ is the sparsity regularization parameter, and $\beta > 0$ is the bound on the Frobenius norm of the dictionary matrix.
57+
""")
58+
return
59+
60+
61+
@app.cell(hide_code=True)
62+
def _(mo):
63+
mo.md(r"""
64+
## Generate problem data
65+
""")
66+
return
67+
68+
4169
@app.cell
42-
def _(BiconvexProblem, cp, np):
70+
def _(np):
4371
m = 10
4472
n = 20
4573
k = 20
4674
beta = 1
4775

4876
Y = np.random.randn(m, n)
77+
return Y, beta, k, m, n
78+
79+
80+
@app.cell(hide_code=True)
81+
def _(mo):
82+
mo.md(r"""
83+
## Specify and solve the problem
84+
""")
85+
return
86+
87+
88+
@app.cell
89+
def _(BiconvexProblem, Y, beta, cp, k, m, n, np):
4990
D = cp.Variable((m, k))
5091
X = cp.Variable((k, n))
5192
alpha = cp.Parameter(nonneg=True)
@@ -64,6 +105,14 @@ def _(BiconvexProblem, cp, np):
64105
return cards, errs
65106

66107

108+
@app.cell(hide_code=True)
109+
def _(mo):
110+
mo.md(r"""
111+
## Plot the results
112+
""")
113+
return
114+
115+
67116
@app.cell
68117
def _(cards, errs, plt):
69118
fig, axs = plt.subplots(1, 1, figsize=(4, 3))

examples/iohmm.py

Lines changed: 96 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1,12 +1,14 @@
11
import marimo
22

3-
__generated_with = "0.17.3"
3+
__generated_with = "0.17.6"
44
app = marimo.App(width="medium")
55

66

77
@app.cell(hide_code=True)
88
def _(mo):
9-
mo.md(r"""# Fitting Input-output Hidden Markov Models""")
9+
mo.md(r"""
10+
# Fitting Input-output Hidden Markov Models
11+
""")
1012
return
1113

1214

@@ -36,6 +38,74 @@ def _():
3638
return BiconvexRelaxProblem, cp, mo, np, plt
3739

3840

41+
@app.cell(hide_code=True)
42+
def _(mo):
43+
mo.md(r"""
44+
## Introduction
45+
46+
We consider the fiting problem of a logistic input-output hidden Markov model (IO-HMM) to some dataset.
47+
Suppose we are given a dataset $(x(t), y(t))$, $t = 1, \ldots, m$, where each sample consists of an input feature vector $x(t) \in \mathbf{R}^n$ and an output label $y(t) \in \{0, 1\}$, generated from a $K$-state IO-HMM, according to the following procedure:
48+
Let $\hat{z}(t) \in \{1, \ldots, K\}$, $t = 1, \ldots, m$, be the state label of the IO-HMM with initial state distribution $p_{\rm init} \in \mathbf{R}^K$ with $\mathbf{1}^T p_{\rm init} = 1$ and transition matrix $P_{\rm tr} \in \mathbf{R}^{K \times K}$ with $P_{\rm tr} \mathbf{1} = \mathbf{1}$.
49+
At the time step $t$, the state label $\hat{z}(t)$ is sampled according to
50+
51+
\[
52+
\hat{z}(t) \sim \left\{
53+
\begin{array}{ll}
54+
{\rm Cat}(p_{\rm init}) & t = 0\\
55+
{\rm Cat}(p_{\hat{z}(t - 1)}) & t > 0,
56+
\end{array}\right.
57+
\]
58+
59+
where the vector $p_{\hat{z}(t-1)} \in \mathbf{R}^K$ denotes the $\hat{z}(t-1)$th row of the matrix $P_{\rm tr}$, and ${\rm Cat}(p)$ denotes the categorical distribution with $p$ being the vector of category probabilities.
60+
Then, given the feature vector $x(t) \in \mathbf{R}^n$, the output $y(t) \in \{0, 1\}$ of this IO-HMM at time step $t$ is then generated from a logistic model, i.e.,
61+
62+
\[
63+
\mathop{\bf prob}(y(t) = 1) = \frac{1}{1 + \exp(-{x(t)}^T \theta_{\hat{z}(t)})},
64+
\]
65+
66+
where $\theta_{\hat{z}(t)} \in \{\theta_1, \ldots, \theta_K\} \subseteq \mathbf{R}^n$ is the coefficient.
67+
68+
We are interested in recovering the transition matrix $P_{\rm tr}$, the model parameters $\theta_1, \ldots, \theta_K$, and the unobserved state labels $\hat{z}(1), \ldots, \hat{z}(m)$, given the dataset $(x(t), y(t))$, $t = 1, \ldots, m$.
69+
Noticing that the transition matrix $P_{\rm tr}$ can be easily estimated from the state labels $\hat{z}(t)$, $t = 1, \ldots, m$, we consider the following biconvex optimization problem for fitting the IO-HMM:
70+
71+
\[
72+
\begin{array}{ll}
73+
\text{minimize} & -\sum_{t = 1}^{m} {z(t)}^T {\left(y(t){x(t)}^T \theta_k - \log(1 + \exp({x(t)}^T \theta_k))\right)}_{k = 1}^K\\
74+
&\qquad + \alpha_\theta \sum_{k = 1}^{K} {\|\theta_k\|}^2_2 + \alpha_z \sum_{t = 1}^{m - 1} D_{\rm kl}(z(t), z(t + 1))\\
75+
\text{subject to} & 0 \preceq z(t) \preceq \mathbf{1},\quad \mathbf{1}^T z(t) = 1,\quad t = 1, \ldots, m\\
76+
& \theta_k \in {\cal C}_k,\quad k = 1, \ldots, K,
77+
\end{array}
78+
\]
79+
80+
where the optimization variables are $\theta_k \in \mathbf{R}^n$, $k = 1, \ldots, K$, and $z(t) \in \mathbf{R}^K$, $t = 1, \ldots, m$.
81+
Note that the variable $z(t)$ is a soft assignment vector for the hidden state label $\hat{z}(t)$, where the $k$th entry of $z(t)$ indicates the probability of the state being $k$ at time step $t$, and $\hat{z}(t)$ can be estimated as the index of the largest entry of $z(t)$ after solving the problem above.
82+
83+
Each component of this problem can be interpreted as follows:
84+
The first term in the objective function is the negative log-likelihood of the observed data under the IO-HMM model, given the state assignment probabilities $z(t)$, $t = 1, \ldots, m$, and the model parameters $\theta_k$, $k = 1, \ldots, K$.
85+
The second term is a Tikhonov regularization on the model parameters $\theta_k$, with regularization parameter $\alpha_\theta > 0$.
86+
The third term is a temporal smoothness regularization on the state assignment probabilities, where $D_{\rm kl}(p, q)$ denotes the Kullback-Leibler divergence between two probability distributions $p$ and $q$, and $\alpha_z > 0$ is the corresponding regularization parameter.
87+
The constraints on the variables $z(t)$, $t = 1, \ldots, m$, ensure that they are valid probability distributions.
88+
The sets ${\cal C}_k \subseteq \mathbf{R}^n$, $k = 1, \ldots, K$, are nonempty closed convex sets that encode potential prior knowledge about the model parameters $\theta_k$.
89+
""")
90+
return
91+
92+
93+
@app.cell(hide_code=True)
94+
def _(mo):
95+
mo.md(r"""
96+
## Generate problem data
97+
98+
We consider the case of $n = 2$, and the feature vector for each sample is generated according to
99+
100+
\[
101+
x(t) \sim ({\cal U}(-5, 5),\ 1),
102+
\]
103+
104+
where ${\cal U}(a, b)$ denotes a uniform distribution over the interval $[a, b]$, and the second entry of $x(t)$ is always $1$ to account for the bias term.
105+
""")
106+
return
107+
108+
39109
@app.cell
40110
def _(np):
41111
m = 1800
@@ -58,6 +128,22 @@ def _(np):
58128
return K, coefs, labels, m, n, p_tr, xs, ys
59129

60130

131+
@app.cell(hide_code=True)
132+
def _(mo):
133+
mo.md(r"""
134+
## Specify and solve the problem
135+
136+
To fully specify the biconvex problem, it is assumed that we are given the following prior knowledge about the coefficients:
137+
138+
\[
139+
\theta_{1,1} \leq 0,\quad \theta_{2, 1} \geq 0,\quad \theta_{3, 1} \geq 0,\quad \theta_{2, 2} \geq \theta_{3, 2},
140+
\]
141+
142+
where $\theta_{i, j}$ denotes the $j$th entry of the vector $\theta_i$.
143+
""")
144+
return
145+
146+
61147
@app.cell
62148
def _(BiconvexRelaxProblem, K, cp, m, n, xs, ys):
63149
thetas = cp.Variable((K, n))
@@ -87,6 +173,14 @@ def _(BiconvexRelaxProblem, K, cp, m, n, xs, ys):
87173
return thetas, zs
88174

89175

176+
@app.cell(hide_code=True)
177+
def _(mo):
178+
mo.md(r"""
179+
## Plot the results
180+
""")
181+
return
182+
183+
90184
@app.cell
91185
def _(K, coefs, labels, m, np, plt, thetas, zs):
92186
fig, axs = plt.subplots(1, 2, figsize=(8, 3), width_ratios=(1.2, 1))

examples/kmeans.py

Lines changed: 48 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -39,6 +39,38 @@ def _():
3939
return BiconvexProblem, cp, make_blobs, mo, np, plt
4040

4141

42+
@app.cell(hide_code=True)
43+
def _(mo):
44+
mo.md(r"""
45+
## Introduction
46+
47+
Suppose we are given a set of data points $x_i \in \mathbf{R}^n$, $i = 1, \ldots, m$, and we would like to cluster them into $k$ groups, using the $k$-means clustering method.
48+
This corresponds to the following biconvex optimization problem:
49+
50+
\[
51+
\begin{array}{ll}
52+
\text{minimize} & \sum_{i = 1}^{m} z_i^T ({\|\bar{x}_1 - x_i\|}_2^2, \ldots, {\|\bar{x}_k - x_i\|}_2^2)\\
53+
\text{subject to} & 0 \preceq z_i \preceq \mathbf{1},\quad \mathbf{1}^T z_i = 1,\quad i = 1, \ldots, m
54+
\end{array}
55+
\]
56+
57+
with variables $\bar{x}_i \in \mathbf{R}^n$, $i = 1, \ldots, k$, and $z_i \in \mathbf{R}^k$, $i = 1, \ldots, m$.
58+
59+
We can interpret the problem formulation as follows:
60+
The variables $\bar{x}_1, \ldots, \bar{x}_k$ represent the cluster centroids, and each variable $z_i$ is a soft assignment vector for data point $x_i$, where the $j$th entry of $z_i$ indicates the probability of the sample $x_i$ belonging to cluster $j$.
61+
Then, the objective function represents the total within-cluster variance, which we would like to minimize.
62+
""")
63+
return
64+
65+
66+
@app.cell(hide_code=True)
67+
def _(mo):
68+
mo.md(r"""
69+
## Generate problem data
70+
""")
71+
return
72+
73+
4274
@app.cell
4375
def _(make_blobs):
4476
n = 2
@@ -49,6 +81,14 @@ def _(make_blobs):
4981
return k, m, n, xs
5082

5183

84+
@app.cell(hide_code=True)
85+
def _(mo):
86+
mo.md(r"""
87+
## Specify and solve the problem
88+
""")
89+
return
90+
91+
5292
@app.cell
5393
def _(BiconvexProblem, cp, k, m, n, xs):
5494
xbars = cp.Variable((k, n))
@@ -62,6 +102,14 @@ def _(BiconvexProblem, cp, k, m, n, xs):
62102
return xbars, zs
63103

64104

105+
@app.cell(hide_code=True)
106+
def _(mo):
107+
mo.md(r"""
108+
## Plot the results
109+
""")
110+
return
111+
112+
65113
@app.cell
66114
def _(np, plt, xbars, xs, zs):
67115
fig, axs = plt.subplots(1, 1, figsize=(3, 3))

0 commit comments

Comments
 (0)