Bandit_book_solutions/Ch10.tex at master · shuaili8/Bandit_book_solutions · GitHub

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
\chapter*{Chapter 10 The Upper Confidence Bound Algorithm: Bernoulli Noise}
\label{sec:tenth}

\noindent\textbf{10.1} (\textsc{Pinsker’s inequality}) Prove Lemma 10.2(b).

\noindent\textsc{Hint}\quad Consider the function $g(x)=d(p, p+x)-2 x^{2}$ over the $[-p, 1-p]$ interval.
By taking derivatives, show that $g \geq 0$.

\begin{proof}
    As the hint suggested, we validate the non-negativity of $g(x)$ over $[-p, 1-p]$.
    \begin{equation*}
        \begin{aligned}
            g^\prime(x)
            &= -\frac{p}{p+x} + \frac{1-p}{1-p-x} - 4x\\
            &= \frac{x}{(1-p-x)(p+x)} - 4x\\
            &\geq 0,
        \end{aligned}
    \end{equation*}
    which leads to $g(q-p) = d(p, q) - 2(p-q)^2 \geq 0$ and finally $d(p, q) \geq 2(p-q)^2$.
\end{proof}

\noindent\textbf{10.2} (\textsc{Asymptotic optimality}) Prove the asymptotic claim in Theorem 10.6.

\noindent\textsc{Hint}\quad Choose $\varepsilon_{1}, \varepsilon_{1}$ to decrease slowly with $n$ and use the first part of the theorem.

\begin{proof}
    If we choose $\varepsilon_{1}, \varepsilon_{1}$ to decrease slowly with $n$ such that

    \begin{enumerate}
        \item $\lim_{n \to \infty} \varepsilon_{1} = \lim_{n \to \infty} \varepsilon_{1} = 0$;
        \item $\lim_{n \to \infty} \varepsilon_{1}^2 \log n = \lim_{n \to \infty} \varepsilon_{2}^2 \log n = \infty$.
    \end{enumerate}

    If so, we can derive that
    \begin{equation*}
        \begin{aligned}
            \limsup _{n \to \infty} \frac{R_{n}}{\log (n)}
            &\leq \lim _{n \to \infty} \sum_{i: \Delta_i > 0} \frac{\Delta_i}{d(\mu, \mu^*)} \frac{\log(1+n\log^2 n)}{\log n}\\
            &\leq \lim _{n \to \infty} \sum_{i: \Delta_i > 0} \frac{\Delta_i}{d(\mu, \mu^*)} \frac{\log(\log^2 n+n\log^2 n)}{\log n}\\
            &= \lim _{n \to \infty} \sum_{i: \Delta_i > 0} \frac{\Delta_i}{d(\mu, \mu^*)} \frac{\log(n+1) + 2\log(\log n)}{\log n}\\
            &= \sum_{i: \Delta_i > 0} \frac{\Delta_i}{d(\mu, \mu^*)},
        \end{aligned}
    \end{equation*}
    which leads to the asymptotic claim in Theorem 10.6.
\end{proof}

\noindent\textbf{10.3} (\textsc{Concentration for bounded random variables}) Let $\mathbb{F}=\left(\mathcal{F}_{t}\right)_{t}$ be a filtration,
$\left(X_{t}\right)_{t}$ be $[0,1]$-valued, $\mathbb{F}$-adapted sequence,
such that $\mathbb{E}\left[X_{t} \mid \mathcal{F}_{t-1}\right]=\mu_{t}$ for some $\mu_{1}, \ldots, \mu_{n} \in[0,1]$ non-random numbers.
Define $\mu=\frac{1}{n} \sum_{t=1}^{n} \mu_{t}$, $\hat{\mu}=\frac{1}{n} \sum_{t=1}^{n} X_{t}$.
Prove that the conclusion of Lemma 10.3 still holds, i.e.,
\begin{align*}
   & \PP{ \hat{\mu}\ge \mu+ \varepsilon} \le \exp\bracket{-nd(\mu+\varepsilon,\mu)} \text{ for $\varepsilon\in[0,1-\mu]$ } \,; \\
    &\PP{\hat{\mu}\le \mu- \varepsilon} \le \exp\bracket{-nd(\mu-\varepsilon,\mu)} \text{ for $\varepsilon\in[0,\mu]$ } \,.
\end{align*}
~\\
\noindent\textsc{Hint}\quad Read Note 2 at the end of this chapter.
Let $g(\cdot, \mu)$ be the cumulant-generating function of the $\mu$-parameter Bernoulli distribution.
For $X \sim \mathcal{B}(\mu)$, $\lambda \in \mathbb{R}$, $g(\lambda, \mu)=\log \mathbb{E}[\exp (\lambda X)]$.
Show that $g(\lambda, \cdot)$ is concave.
Next, use this and the tower rule to show that $\mathbb{E}[\exp (\lambda n(\hat{\mu}-\mu))] \leq g(\lambda, \mu)^{n}$.

\begin{proof}
    % We first follow the same procedure
    % \begin{equation*}
    %     \begin{aligned}
    %         \mathbb{P}(\hat{\mu} \geq \mu+\varepsilon)
    %         &= \mathbb{P}(\hat{\mu} - \mu \geq \varepsilon)\\
    %         &= \mathbb{P}\bracket{\exp(\lambda \sum_{t=1}^n (X_t - \mu)) \geq \exp(\lambda n \varepsilon)}\\
    %         &\leq \frac{\EE{\exp(\lambda \sum_{t=1}^n (X_t - \mu))}}{\exp(\lambda n \varepsilon)},
    %     \end{aligned}
    % \end{equation*}
    % where the inequality holds according to Markov's inequality.

    According to the Hint, let $g(\lambda, \mu) = \log \EE{\exp\bracket{\lambda X}}$ and we would show that $g(\lambda, \cdot)$ is concave in the following.
    Based on the definition, we can compute that $g(\lambda, \mu) = \log(\mu \exp(\lambda) + (1 - \mu)) - \lambda \mu$,
    which leads to $\frac{d^2}{d \mu^2} g(\lambda, \mu) = \frac{-(\exp(\lambda) - 1)^2}{(\mu \exp(\lambda) + 1 - \mu)^2} \leq 0$.

    Let $S_p = \sum_{t=1}^p (X_t - \mu_t)$, it holds that
    \begin{equation*}
        \begin{aligned}
            \mathbb{E}[\exp(\lambda S_p)]
            &= \mathbb{E}[\mathbb{E}[\exp(\lambda S_p) | \mathcal{F}_{p-1}]]\\
            &= \mathbb{E}[\mathbb{E}[\exp(\lambda S_{p-1}) \exp(\lambda(X_p - \mu_p)) | \mathcal{F}_{p-1}]]\\
            &= \mathbb{E}[\exp(\lambda S_{p-1}) \mathbb{E}[\exp(\lambda(X_p - \mu_p)) | \mathcal{F}_{p-1}]]\\
            &= \EE{\exp(\lambda S_{p-1}) \EE{ \frac{\exp(\lambda X_p)}{\exp(\lambda \mu_p)} | \mathcal{F}_{p-1} }}\\
            &\leq \EE{\exp(\lambda S_{p-1}) \EE{\frac{X_p (\exp(\lambda) - 1) + 1}{\exp(\lambda \mu_p)} | \mathcal{F}_{p-1}}}\\
            &= \EE{\exp(\lambda S_{p-1}) \frac{\mu_p(\exp(\lambda) - 1) + 1}{\exp(\lambda \mu_p)}}\\
            &= \mathbb{E}[\exp(\lambda S_{p-1})] \exp(g(\lambda, \mu_p))\,,
        \end{aligned}
    \end{equation*}
    where the inequality follows from noticing that $f(x) = \exp(\lambda x) - x(\exp(\lambda) - 1) - 1 \leq 0$ on $[0, 1]$, as suggested in Note 2.

    The above conclusion leads to $\mathbb{E}[\exp(\lambda S_{n})] \leq \exp\bracket{\sum_{t=1}^n g(\lambda, \mu_t)} \leq \exp(n g(\lambda, \mu))$.
    Hence, we have
    \begin{equation*}
        \begin{aligned}
            \mathbb{P}(\hat{\mu} \geq \mu+\varepsilon)
            &\leq \frac{\mathbb{E}[\exp(\lambda \sum_{t=1}^n (X_t - \mu))]}{\exp(\lambda n \varepsilon)}\\
            &\leq \frac{\exp(n g(\lambda, \mu))}{\exp(\lambda n \varepsilon)}\\
            &= \bracket{\frac{\exp(g(\lambda, \mu))}{\exp(\lambda \varepsilon)}}^2\\
            &= (\mu \exp(\lambda(1 - \mu - \varepsilon)) + (1 - \mu) \exp(-\lambda(\mu + \varepsilon)))^n \,,
        \end{aligned}
    \end{equation*}
    which can be followed by the same procedures as in the proof of Lemma 10.3, and the first inequality is due to Markov inequality that $\PP{\hat{\mu} \geq \mu+\varepsilon}= \PP{\hat{\mu} - \mu \geq \varepsilon} = \PP{\exp(\lambda \sum_{t=1}^n (X_t - \mu)) \geq \exp(\lambda n \varepsilon)} \le \frac{\EE{\exp(\lambda \sum_{t=1}^n (X_t - \mu))}}{\exp(\lambda n \varepsilon)}$.
\end{proof}