Bandit_book_solutions/Ch2.tex at master · shuaili8/Bandit_book_solutions · GitHub

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575
576
577
578
579
580
581
582
583
584
585
586
587
588
589
590
591
592
593
594
595
596
597
598
599
600
601
602
603
604
605
606
607
608
609
610
611
612
613
614
615
616
617
618
619
620
621
622
623
624
625
626
627
628
629
630
631
632
633
634
635
636
637
638
639
640
641
642
643
644
645
646
647
648
649
650
651
652
653
654
655
656
657
658
659
660
661
662
663
664
665
666
667
668
669
670
671
672
673
674
675
676
677
678
679
680
681
682
683
684
685
686
687
688
689
690
691
692
693
694
695
696
697
698
699
700
701
702
703
704
705
706
707
708
709
710
711
712
713
714
715
716
717
718
719
720
721
722
723
724
725
726
727
728
729
730
731
732
733
734
735
736
737
738
739
740
741
742
743
744
745
746
747
748
749
750
751
752
753
754
755
756
757
758
759
760
761
762
763
764
765
766
767
768
769
770
771
772
773
774
775
776
777
778
779
780
781
782
783
784
785
786
787
788
789
790
791
792
793
794
795
796
797
798
799
800
801
802
803
804
805
806
807
808
809
810
%!TEX root =  main.tex

% for theorem etc ************************************************
% \newtheorem{theorem}{Theorem}[section]
% \newtheorem{lemma}[theorem]{Lemma}
% \newtheorem{proposition}[theorem]{Proposition}
% \newtheorem{corollary}[theorem]{Corollary}
% \newtheorem{definition}{Definition}[section]
% \newtheorem{remark}{Remark}[section]
% \newtheorem{example}{Example}[section]
% \newenvironment{solution}{\begin{proof}[Solution]}{\end{proof}
% ************************************************

\chapter*{Chapter 2 Foundations of Probability}
\label{sec:second}

% \kant[7-11] % Dummy text

% \begin{theorem}[{\cite[95]{AM69}}]
%     \label{thm:dedekind}
%     Let \( A \) be a Noetherian domain of dimension one. Then the following are equivalent:
%     \begin{enumerate}
%         \item \( A \) is integrally closed;
%         \item Every primary ideal in \( A \) is a prime power;
%         \item Every local ring \( A_\mathfrak{p} \) \( (\mathfrak{p} \neq 0) \) is a discrete valuation ring.
%     \end{enumerate}
% \end{theorem}

\noindent\textbf{2.1} (\textsc{Composing random elements}) Show that if $f$ is $\mathcal{F}/\mathcal{G}$-measurable and $g$
is $\mathcal{G}/\mathcal{H}$-measurable for sigma algebras $\mathcal{F}$,$\mathcal{G}$ and $\mathcal{H}$ over appropriate spaces, then
their composition, $g \circ f$ (defined the usual way: $( g \circ f )(\omega) = g(f(\omega)), \omega \in \Omega)$, is
$\mathcal{F}/\mathcal{H}$-measurable.

\begin{proof}
    Since $g$ is $\mathcal{G}/\mathcal{H}$-measurable, therefore $\forall C \in \mathcal{H}$,\ $ \exists  B=g^{-1}(C)\in \mathcal{G} $ . Similarly, since $f$ is $\mathcal{F}/\mathcal{G}$-measurable, $\forall B \in \mathcal{G}$,\ $ \exists  A=f^{-1}(B)\in \mathcal{F} $ . Thus $\forall C \in \mathcal{H}$, \ $ \exists  A=f^{-1}(g^{-1}(C))=(g\circ f)^{-1}(C)\in \mathcal{F} $ and the proof is complete. \\

\end{proof}

% \subsubsection{}

\noindent\textbf{2.2}
Let $X_1,\dots,X_n$ be random variables on $(\Omega, \mathcal{F})$. Prove that $X=( X_1 ,\dots,X_n )$ is a random vector.
\begin{proof}
    Since $X_i$ is a random variable ($\forall i=1,2,...,n$), it holds that $X_i$ is $\cF/\cB(\RR)$-measurable, which means that $\forall B \in \cB(\RR)$, $X_i^{-1}(B) \in \cF$.
    We first prove that $X$ is $\cF/(\cB(\RR) \times \cB(\RR)\times\cdots \cB(\RR) )$-measurable (totally $n$ $\cB(\RR)$s). $\forall A=A_1\times A_2 \times \cdots \times A_n  \in \cB(\RR) \times \cB(\RR)\times\cdots \cB(\RR)$, $X^{-1}(A) =X_1^{-1}(A_1) \cap X_2^{-1}(A_2) \cap \cdots \cap X_n^{-1}(A_n) \in \cF$, which holds since $X_i^{-1}(A_i) \in \cF, \forall i =1,2,...,n$ and $\cF$ is a $\sigma$-algebra. Thus we conclude that $X$ is $\cF/(\cB(\RR) \times \cB(\RR)\times\cdots \cB(\RR) )$-measurable.

    By definition $\cB(\RR^n) = \sigma (\cB(\RR) \times \cB(\RR)\times\cdots \cB(\RR))$ (totally $n$ $\cB(\RR)$s). And according to the property in 2.5(b), we can get that $X$ is $\mathcal{F}/\mathcal{B}(\RR^n)$-measurable, thus it is a random vector.\\
    % We claim that $X=(X_1,X_2,...,X_n)$ is $\mathcal{F}/\mathcal{B}(\mathbb{R}^n)$ measurable. Define $a=(a_1,a_2,...,a_n)$ $b=(b_1,b_2,...,b_n)$ with $a,b\in \mathbb{R}^n$ where $a<b$. Since $X_1,X_2,...,X_n$ is $\mathcal{F}/\mathcal{B}(\mathbb{R})$ measurable, therefore $\exists A_1=X^{-1}_1((a_1,b_1)),A_2=X^{-1}_2((a_2,b_2)),...,A_n=X^{-1}_n((a_n,b_n))\in \mathcal{F}$. Let $A=A_1\cap A_2 \cap...\cap A_n=\bigcap\limits^{n}_{i=1}A_i$. It follows that $X^{-1}((a,b))=\bigcap\limits^{n}_{i=1}((a,b))=A\in \mathcal{F}$. Therefore $X$ is $\mathcal{F}/\mathcal{B}(\mathbb{R}^n)$ measurable and $X$ is random vector. \\
\end{proof}

% \subsubsection{}

\noindent\textbf{2.3}
(\textsc{Random variable induced $\sigma$-algebra}) Let $\mathcal{U}$ be an arbitrary set and
$( \mathcal{V}, \Sigma)$ a measurable space and $X : \mathcal{U} \rightarrow \mathcal{V}$ an arbitrary function. Show that
$\Sigma_X = \{X ^{-1} (A) : A \in \Sigma\}$ is a $\sigma$-algebra over $\mathcal{U}$.

\begin{proof}
    \begin{enumerate}
        \item[(i)] We need to show that $\Sigma_X$ is closed under countable union. Let $U_i=X^{-1}(A_i),A_i\in \Sigma, i\in \mathbb{N}$. It follows that $\bigcup\limits^{\infty}_{i=1}U_i=\bigcup\limits^{\infty}_{i=1}X^{-1}(A_i)=X^{-1}(\bigcup\limits^{\infty}_{i=1}A_i)$. Since $\bigcup\limits^{\infty}_{i=1}A_i\in \Sigma$, $\bigcup\limits^{\infty}_{i=1}U_i\in \Sigma_X$.
        \item[(ii)] We need to show that $\Sigma_X$ is closed under set subtraction $-$. $\forall U_1,U_2\in \Sigma_X$,$U_1-U_2=X^{-1}(A_1)-X^{-1}(A_2)=X^{-1}(A_1-A_2)$. Since $A_1-A_2\in \Sigma$, $U_1-U_2\in \Sigma_X$.
        \item[(iii)] We need to show that $\Sigma_X$ is closed to $\mathcal{U}$ itself. Since $\mathcal{U}=X^{-1}(\mathcal{V})$ and $\mathcal{V}\in \Sigma$, it follows that $\mathcal{U}\in \Sigma_X$.
        \end{enumerate}
\end{proof}


\noindent\textbf{2.4}
Let $(\Omega,\mathcal{F})$ be a measurable space and $A \subseteq \Omega$ and $\mathcal{F}_{|A} = \{A \cap B: B \in \mathcal{F}\}$.

\begin{proof}
    \begin{enumerate}
        \item[(a)] \begin{enumerate}
                \item[(i)] We need to show that $\mathcal{F}|_A$ is closed under countable union. Let $X_1=A\cap B_1,X_2=A\cap B_2,...$ and $X^{\prime}=\bigcup\limits^{\infty}_{i=1}X_i$ and $B^{\prime}=\bigcup\limits^{\infty}_{i=1}B_i$ where $B_1,B_2,...\in \mathcal{F}$. Since $\mathcal{F}$ is sigma algebra, $B^{\prime}\in \mathcal{F}$. Furthermore, since $X^{\prime}=\bigcup^{\infty}_{i=1}X_i=\bigcup^{\infty}_{i=1}A\bigcap B_i=A\bigcap\left(\bigcup\limits^{\infty}_{i=1}B_i \right)=A\bigcap B^{\prime}$, we can see that $X^{\prime}\in \mathcal{F}|_A$.
                \item[(ii)] We need to show that $\mathcal{F}|_A$ is closed under set subtraction $-$. $\forall X_1,X_2\in \mathcal{F}|_A$, $X_1-X_2=(A\bigcap B_1)-(A\bigcap B_2)=A\bigcap(B_1-B_2)$. Since $B_1-B_2\in \mathcal{F}$, it follows that $X_1-X_2\in \mathcal{F}|_A$.
                \item[(iii)] We need to show that $\mathcal{F}|_A$ is closed to $A$ itself. Since $\varnothing \in \mathcal{F}$, we have $\varnothing=A\bigcap\varnothing\in \mathcal{F}|_A$ and $A=\varnothing^{C}\in \mathcal{F}|_A$.
              \end{enumerate}
        \item[(b)] Let $P=\{ A\bigcap B:B\in \mathcal{F} \}, Q=\{ B: B\subset A, B\in \mathcal{F} \}$.
                \begin{enumerate}
                    \item[(i)] We claim that $P\subset Q$. Let $X=A\bigcap B, B\in \mathcal{F}$. Since $A\in \mathcal{F}$, $X=A\bigcap B\in \mathcal{F}$. Furthermore, $X\in Q=\{B:B\subset A, B\in \mathcal{F} \}$.
                    \item[(ii)] We claim that $Q\subset P$. $\forall X\in Q$, we have $X\subset A$ and $X\in \mathcal{F}$, which means that $X=X\bigcap A$ and $X\in \mathcal{F}$. It follows that $X\in P$.
                    \item[(iii)] Take both (i)(ii) into consideration, we can see that $P=Q$.
                \end{enumerate}
        \end{enumerate}
\end{proof}


\noindent\textbf{2.5}
Let $\mathcal{G}\subseteq 2^{\Omega}$ be a non-empty collection of sets and define $\sigma(\mathcal{G})$ as the smallest
$\sigma$-algebra that contains $\mathcal{G}$. By `smallest' we mean that $\mathcal{F}\in 2^{\Omega}$ is smaller than
$\mathcal{F}^{\prime}\in 2^{\Omega}$ if $\mathcal{F}\subset \mathcal{F}^{\prime}$.
\begin{enumerate}
    \item[(a)] Show that $\sigma(\mathcal{G})$ exists and contains exactly those sets $A$ that are in every
    $\sigma$-algebra that contains $\mathcal{G}$.
    \item[(b)] Suppose $(\Omega^{\prime},\mathcal{F})$ is a measurable space and $X:\Omega^{\prime}\rightarrow \Omega$ be $\mathcal{F}/\mathcal{G}$-measurable. Show that X is also $\mathcal{F}/\sigma(\mathcal{G})$-measurable. (We often use this result to simplify the job of checking whether a random variable satisfies some measurability property).
    \item[(c)] Prove that if $A\in \mathcal{F}$ where $\mathcal{F}$ is a $\sigma$-algebra, then $\mathbb{I}\{A\}$ is $\mathcal{F}$-measurable.
\end{enumerate}

\begin{proof}
\begin{enumerate}
    \item[(a)] Let $\mathcal{K} = \{\mathcal{F} | \mathcal{F} \mbox{ is a }\sigma\mbox{-algebra and contains } \mathcal{G}\}$, It holds obviously that $\mathcal{K}$ is not an empty set since it contains $2^{\cG}$.

    Then $\bigcap_{\mathcal{F} \in \mathcal{K}}\mathcal{F}$ contains exactly those sets that are in every $\sigma$-algebra that contains $\mathcal{G}$. Given its existence, we only need to prove that $\bigcap_{\mathcal{F} \in \mathcal{K}}\mathcal{F}$ is the smallest $\sigma$-algebra that contains $\mathcal{G}$.

    First we show $\bigcap_{\mathcal{F} \in \mathcal{K}}\mathcal{F}$ is a $\sigma$-algebra. Since $\mathcal{F}$ is a $\sigma$-algebra and therefore $\Omega \in \mathcal{F}$ for all $\mathcal{F} \in \mathcal{K}$, it follows that $\Omega \in \bigcap_{\mathcal{F} \in \mathcal{K}}\mathcal{F}$. Next, for any $A \in \bigcap_{\mathcal{F} \in \mathcal{K}}\mathcal{F}$, $A^c \in \mathcal{F}$ for all $\mathcal{F} \in \mathcal{K}$. Since they are all $\sigma$-algebras, $A^c \in \mathcal{F}$ for all $\mathcal{F} \in \mathcal{K}$. Hence $A^c \in \bigcap_{\mathcal{F} \in \mathcal{K}}\mathcal{F}$. Finally, for any $\{A_i\}_i \subset \bigcap_{\mathcal{F} \in \mathcal{K}}\mathcal{F}$, $\{A_i\}_i \subset \mathcal{F}$ for all $\mathcal{F} \in \mathcal{K}$. Since they are all $\sigma$-algebras, $\bigcup_i A_i \in \mathcal{F}$ for all $\mathcal{F} \in \mathcal{K}$. Hence $\bigcup_i A_i \in \bigcap_{\mathcal{F} \in \mathcal{K}}\mathcal{F}$.

    Next we want to prove $\bigcap_{\mathcal{F} \in \mathcal{K}}\mathcal{F}$ is the smallest $\sigma$-algebra that contains $\cG$. It is quite obvious that $\bigcap_{\mathcal{F} \in \mathcal{K}}\mathcal{F} \subseteq \mathcal{F}'$ for all $\mathcal{F}' \in \mathcal{K}$.

    Above all, we have $\sigma(\cG)=\bigcap_{\mathcal{F} \in \mathcal{K}}\mathcal{F}$.

    \item[(b)] Define $\mathcal{H}=\left\{A: X^{-1}(A) \in \mathcal{F}\right\}$.
    To show $X$ is $\cF/\sigma(\cG)$-measurable, it is sufficient to prove $\sigma(\mathcal{G}) \subseteq \mathcal{H}$.

    First we prove that $\mathcal{H}$ is a $\sigma$-algebra.
    It holds that $\Omega \in \mathcal{H}$ since $X^{-1}(\Omega) = \Omega' \in \cF$.
    For any $A \in \mathcal{H}$, we have $X^{-1}(A) \in \mathcal{F}$, thus $X^{-1}(A^c) = X^{-1}(A)^c \in \mathcal{F}$, which holds since $\cF$ is a $\sigma$-algebra. Thus $A^c \in \mathcal{H}$. For any $A_i \in \cF$, $i=1,2,...$, $X^{-1}(A_i) \in \mathcal{F}$, $X^{-1}(\cup_i A_i) = \cup_i X^{-1}(A_i) \in \mathcal{F}$. We can then conclude $\cup_i A_i \in \cH$ and $\cH$ is a $\sigma$-algebra.


    Also, since $X$ is $\cF/\cG$-measurable, we have $\mathcal{G} \subseteq \mathcal{H}$.
    Thus $\mathcal{H}$ is $\sigma$-algebra that contains $\mathcal{G}$.
    By applying the result of (a), we have $\sigma(\mathcal{G}) \subseteq \mathcal{H}$, which completes the proof.

    \item[(c)] The idea is to show $\forall B \in \mathfrak{B}(\mathbb{R})$, $\mathbb{I}\{A\}^{-1}(B) \in \mathcal{F}$.

    If $\{0,1\} \in B$, $\mathbb{I}\{A\}^{-1}(B)=\Omega \in \mathcal{F}$. If $\{0\} \in B$, $\mathbb{I}\{A\}^{-1}(B)=A^c \in \mathcal{F}$. If $\{1\} \in B$, $\mathbb{I}\{A\}^{-1}(B)=A \in \mathcal{F}$. If $\{0,1\} \cap B = \emptyset$, $\mathbb{I}\{A\}^{-1}(B)=\emptyset \in \mathcal{F}$.
\end{enumerate}
\end{proof}

\noindent\textbf{2.6}
(\textsc{Knowledge and $\sigma$-algebras: a pathological example}) In the context
of Lemma 2.5, show an example where $Y = X$ and yet $Y$ is not $\sigma(X)$ measurable. \\

\noindent\textsc{Hint}\quad As suggested after the lemma, this can be arranged by choosing $\Omega= \mathcal{Y} = \mathcal{X} = \mathbb{R}, X(\omega) = Y(\omega) = \omega, \mathcal{F} = \mathcal{H} = \mathfrak{B}(\mathbb{R})$
and $\mathcal{G} = \{\emptyset,\mathbb{R}\}$ to be the trivial $\sigma$-algebra.

\begin{proof}
As the hint suggests, Let $\Omega= \mathcal{Y} = \mathcal{X} = \mathbb{R}, X(\omega) = Y(\omega) = \omega, \mathcal{F} = \mathcal{H} = \mathfrak{B}(\mathbb{R})$.
In this case, $\sigma(X) = \{{X}^{-1}(A): A \in \mathcal{G}\} = \{\emptyset, \mathbb{R}\}$, we can find that $Y^{-1}((0, 1))=(0, 1) \notin \sigma(X)$, thus $Y$ is not $\sigma(X)$-measurable.
\end{proof}


\noindent\textbf{2.7}
Let $(\Omega,\mathcal{F},\mathbb{P})$ be a probability space, $B\in\mathcal{F}$ be such that $\mathbb{P}(B) > 0$. Prove that $A \mapsto \mathbb{P}(A|B)$ is a probability measure over $(\Omega,\mathcal{F})$.
\begin{proof}
    First we have $\mathbb{P}(\Omega \mid B) = \frac{\mathbb{P}(\Omega \cap B)}{\mathbb{P}(B)} = \frac{\mathbb{P}(B)}{\mathbb{P}(B)} = 1$.
    Then, for any $A \in \mathcal{F}$, $\mathbb{P}(A \mid B) = \frac{\mathbb{P}(A \cap B)}{\mathbb{P}(B)} \geq 0$.
    Next, for any $A \in \mathcal{F}$, $\mathbb{P}(A^c \mid B) = \frac{\mathbb{P}(A^c \cap B)}{\mathbb{P}(B)} = \frac{\mathbb{P}((\Omega - A) \cap B)}{\mathbb{P}(B)} = \frac{\mathbb{P}(B) - \mathbb{P}(A \cap B)}{\mathbb{P}(B)} = 1 - \mathbb{P}(A \mid B)$.
    Finally, for all countable collections of disjoint sets $\{A_i\}_i$ with $A_i \in \mathcal{F}$ for all $i$, we have $\mathbb{P}\left(\bigcup_{i} A_{i} \mid B\right) = \frac{\mathbb{P}((\bigcup_{i} A_{i}) \cap B)}{\mathbb{P}(B)} = \frac{\mathbb{P}(\bigcup_{i} (A_{i} \cap B))}{\mathbb{P}(B)} = \sum_{i} \frac{\mathbb{P}(A_{i} \cap B)}{\mathbb{P}(B)} = \sum_{i} \mathbb{P}(A_i \mid B)$. \\
\end{proof}

\noindent\textbf{2.8}
\textsc{(Bayes law)} Verify (2.2).

\begin{proof}
    With the definition of conditional probability, we have $\mathbb{P}(A \mid B) = \frac{\mathbb{P}(A \cap B)}{\mathbb{P}(B)} = \frac{\mathbb{P}(B \mid A) \mathbb{P}(A)}{\mathbb{P}(B)}$. \\
\end{proof}

\noindent\textbf{2.9}
Consider the standard probability space $(\Omega,\mathcal{F},\mathbb{P})$ generated by two standard, unbiased, six-sided dice that are thrown independently of each other. Thus,
$\Omega=\{1, . . . , 6\}^2$, $\mathcal{F} = 2^{\Omega}$ and $\mathbb{P}(A) = |A|/6^2$ for any $A\in\mathcal{F}$ so that $X_i(\omega)=\omega_i$ represents the outcome of throwing dice $i\in\{1,2\}$.
\begin{enumerate}
    \item[(a)] Show that the events `$X_1 < 2$' and `$X_2$ is even' are independent of each other.
    \item[(b)] More generally, show that for any two events, $A\in \sigma(X_1)$ and $B\in \sigma(X_2)$, are independent of each other.
\end{enumerate}

\begin{proof}
\begin{enumerate}
    \item[(a)]
    The event $\set{X_1 < 2} = \set{1}\times \set{1,2,3,4,5,6}$, $\set{X_2 \text{ is even }} = \set{1,2,3,4,5,6} \times \set{2,4,6}$,\\  $\set{X_1 < 2, X_2 \text{ is even }} = \set{(1,2),(1,4),(1,6)}$.

    Thus $\PP{X_1 < 2} = \frac{6}{36} = \frac{1}{6}$, $\PP{X_2 \text{ is even }} = \frac{18}{36} = \frac{1}{2}$, $\PP{X_1 < 2, X_2 \text{ is even }} = \frac{3}{36} = \frac{1}{12}$, which satisfies $\PP{X_1 < 2, X_2 \text{ is even }} = \PP{X_1 < 2}\times \PP{X_2 \text{ is even }}$. These two events are independent of each other.

\item[(b)] $\sigma(X_1) = \set{X_1^{-1}(A'),A'\subseteq [6]} = \set{A'\times [6]:A' \subseteq [6]}$,  $\sigma(X_2) = \set{X_2^{-1}(B'),B'\subseteq [6]} = \set{[6]\times B':B' \subseteq [6]}$. Thus $\forall A \in \sigma(X_1), B \in \sigma(X_2)$, $\PP{A} = \frac{\abs{A'}\times 6}{36} = \frac{\abs{A'}}{6}$, $\PP{B} = \frac{6\times \abs{B'}}{36} = \frac{\abs{B'}}{6}$ and $\PP{A \cap B} = \frac{\abs{A'}\times \abs{B'}}{36} =\PP{A} \times \PP{B}$. So $A$ and $B$ are independent of each other.
\end{enumerate}
\end{proof}

\noindent\textbf{2.10}
(\textsc{Serendipitous independence}) The point of this exercise is to understand independence more deeply. Solve the following problems:
\begin{enumerate}
    \item[(a)] Let $(\Omega,\mathcal{F},\mathbb{P})$ be a probability space.
    Show that $\emptyset$ and $\Omega$ (which are events) are independent of any other event. What is the intuitive meaning of this?
    \item[(b)] Continuing the previous part, show that any event $A\in\mathcal{F}$ with $\mathbb{P}(A)\in \{0, 1\}$ is independent of any other event.
    \item[(c)] What can we conclude about an event $A \in\mathcal{F}$ that is independent of its complement,
    $A^c =\Omega\setminus A$? Does your conclusion make intuitive sense?
    \item[(d)] What can we conclude about an event $A\in\mathcal{F}$ that is independent of itself?
        Does your conclusion make intuitive sense?
    \item[(e)] Consider the probability space generated by two independent flips of unbiased
        coins with the smallest possible $\sigma$-algebra. Enumerate all pairs of events
        $A,B$ such that $A$ and $B$ are independent of each other.
    \item[(f)] Consider the probability space generated by the independent rolls of two
        unbiased three-sided dice. Call the possible outcomes of the individual dice
        rolls $1$, $2$ and $3$. Let $X_i$ be the random variable that corresponds to the
        outcome of the $i$th dice roll ($i\in \{1, 2\}$). Show that the events $\{X_1 \leq 2\}$ and
        $\{X_1 = X_2\}$ are independent of each other.
    \item[(g)] The probability space of the previous example is an example when the
        probability measure is uniform on a finite outcome space (which happens to
        have a product structure). Now consider any $n$-element, finite outcome space
        with the uniform measure. Show that $A$ and $B$ are independent of each other
        if and only if the cardinalities $|A|$, $|B|$, $|A \cap B|$ satisfy $n|A \cap B|=|A| \cdot |B|$.
    \item[(h)] Continuing with the previous problem, show that if $n$ is prime, then no
        non-trivial events are independent (an event $A$ is \textbf{trivial} if $\mathbb{P} (A) \in \{0, 1\}$).
    \item[(i)] Construct an example showing that pairwise independence does not imply
        mutual independence.
    \item[(j)] Is it true or not that $A,B,C$ are mutually independent if and only if
        $\mathbb{P}(A \cap B \cap C) = \mathbb{P}(A) \mathbb{P}(B) \mathbb{P}(C)$? Prove your claim.
\end{enumerate}

\begin{proof}
\begin{enumerate}
    \item[(a)]Empty sets and complete sets are independent of any event:

$$P(A\cap\Omega)=P(A)=1\times P(A)=P(\Omega)\times P(A)$$
$$P(A\cap\emptyset)=P(\emptyset)=0=P(\emptyset)\times P(A)$$

\item[(b)]
% Prove when $P(A)=0$ or $1$, $A$ is independent of any event:\\
For any $B\in\Omega$ and $P(A)\in\{0,1\}$:\\
when $P(A)=1$,$P(A^c \cap B)\leq P(A^c) = 1-P(A) =0$, we have $P(A \cap B) = P(A \cap B) + P(A^c \cap B) = P(B) = P(A)P(B)$; when $P(A)=0$ ,we have $P(A \cap B)\leq P(A) =0=P(A)P(B)$

\item[(c)]$P(A^c \cap A) = P(A)P(A^c)$,
we have $0=P(A)(1-P(A))\Rightarrow P(A)\in\{0,1\}$

\item[(d)]$P(A \cap A) = P(A)P(A)$, we have $P(A)=\set{0,1}$

\item[(e)]$\Omega=\{(1,1),(1,0),(0,1),(0,0)\}. A,B \subseteq \Omega$ denote the events.

First of all, if either $A$ or $B$ is trival, then $A$ and $B$ are independent of each other.

Then, we only need to enumerate $A,B\notin {\Omega, \emptyset}$ satisfied that $P(A\bigcap B) = P(A)P(B)$.
Since $P(A\bigcap B) = \frac{|A\bigcap B|}{|\Omega|} = \frac{|A \bigcap B|}{4}$ and $P(A)P(B) = \frac{|A||B|}{16}$, we can conclude that $|A| = 2$, $|B| = 2$ and $|A\bigcap B| = 1$ is the only situation satisfying the condition.

Thus, besides trival $A$ or $B$, all $A, B$ satisfying $|A| = 2$, $|B| = 2$ and $|A\bigcap B| = 1$ are the solution.

\iffalse
Just verify that each case is independent :$P(A=1,B=1)=\frac{1}{4}=\frac{1}{2} *\frac{1}{2}=P(A)P(B)$

$P(A=1,B=0)=\frac{1}{4}=\frac{1}{2} *\frac{1}{2}=P(A)P(B)$

$P(A=0,B=1)=\frac{1}{4}=\frac{1}{2} *\frac{1}{2}=P(A)P(B)$

$P(A=0,B=0)=\frac{1}{4}=\frac{1}{2} *\frac{1}{2}=P(A)P(B)$
\fi

\item[(f)]$P(X_1 \leq 2)=2/3$

$P(X_1 = X_2)=3/9=1/3$

$P(X_1 \leq 2,X_1 = X_2)=P(X_1 = X_2=1)+P(X_1 = X_2=2)=1/9+1/9=2/9$

So, $P(X_1 \leq 2,X_1 = X_2)=P(X_1 = X_2)P(X_1 \leq 2)$

\item[(g)]Necessity :$\frac{|A\bigcap B|}{n}=P(A \bigcap B)=P(A)P(B)=\frac{|A|}{n}\frac{|B|}{n}$

$\Rightarrow |A\bigcap B|\times n = |A||B|$

Sufficiency :$|A\bigcap B|\times n = |A||B|\Rightarrow \frac{|A|}{n}\frac{|B|}{n}=\frac{|A\bigcap B|}{n}$

$\Rightarrow P(A \bigcap B)=P(A)P(B)$

\item[(h)] If $A,B$ are two non-trival events independent to each other, $|A\bigcap B|\times n = |A||B|\Rightarrow n| \ (|A||B|) \Rightarrow n|\ (|A|) \ \text{or}\  n |\ (|B|) \Rightarrow |A| = n\ \text{or}\ |B| = n$, contradictory to non-trival assumption.

\item[(i)] Let $\Omega = \{1,2,3,4\},\ A = \{1,2\},\ B = \{1,3\},\ C = \{1,4\}$. $A,\ B,\ C$ are pairwise independent but $P(A\bigcap B \bigcap C) = \frac{1}{4} \neq P(A) P(B)P(C) = \frac{1}{8}$.

\item[(j)]Consider rolling a dice and set $A=\{1,2,3\}$, $B=\{1,2,4\}$,$C=\{1,4,5,6\}$. Then $P(A \cap B \cap C)=\frac{1}{6} = (1/2)*(1/2)*(2/3) = P(A)P(B)P(C)$, however $P(A\cap B)=1/3 \neq \frac{1}{2}*\frac{1}{2} = P(A) P(B)$. Thus $P(A \cap B \cap C) =  P(A)P(B)P(C) $ does not mean mutuall independence.
\end{enumerate}
\end{proof}

\noindent\textbf{2.11}
(\textsc{Independence and random elements}) Solve the following problems:
\begin{enumerate}
    \item[(a)] Let $X$ be a constant random element (that is, $X(\omega) = x$ for any $\omega \in \Omega$ over the outcome space over which $X$ is defined). Show that $X$ is independent of any other random variable.
    \item[(b)] Show that the above continues to hold if $X$ is almost surely constant (that is, $\PP{X=x}=1$ for an appropriate value $x$).
    \item[(c)] Show that two events are independent if and only if their indicator random variables are independent (that is, $A, B$ are independent if and only if $X(\omega) = \bOne{\omega \in A}$ and $Y(\omega) = \bOne{\omega \in B}$ are independent of each other).
    \item[(d)] Generalise the result of the previous item to pairwise and mutual independence for collections of events and their indicator random variables.
\end{enumerate}

\begin{proof}

\begin{enumerate}
    \item[(a)] To prove $X$ is independent of another random variable $Y$,
    we can equivalently show that $\sigma(X)$ and $\sigma(Y)$ are independent.
    And notice $\sigma(X) = \{\emptyset, \Omega\}$ for constant random element $X$.
    Therefore, for all $A \in \sigma(X)$ and $B \in \sigma(Y)$ it trivially holds that $\mathbb{P}(A \cap B) = \mathbb{P}(A) \mathbb{P}(B)$.

    \item[(b)] Given that $\mathbb{P}(X = x) = 1$, we can infer the generated sigma-algebra $\sigma(X) = \{\emptyset, \Omega, G_1, G_2, \cdots\}$,
    where $\mathbb{P}(G_1) = \mathbb{P}(G_2) = \cdots = 0$.
    Therefore, for any $A \in \sigma(X)$, we have $\mathbb{P}(A) = 0$ or $\mathbb{P}(A) = 1$.
    By the result of 2.10(b), $X$ is independent of any other random variable.

    \item[(c)] Notice that $\sigma(X) = \{\emptyset, \Omega, A, A^c\}$, $\sigma(Y) = \{\emptyset, \Omega, B, B^c\}$.
    \begin{enumerate}
        \item[(i)] `only if': Given that $A$, $B$ are independent, we have $\mathbb{P}(A \cap B) = \mathbb{P}(A) \mathbb{P}(B)$.
        Consequently $\mathbb{P}(A \cap B^c) = \mathbb{P}(A) - \mathbb{P}(A \cap B) = \mathbb{P}(A) - \mathbb{P}(A) \mathbb{P}(B) = \mathbb{P}(A) \mathbb{P}(B^c)$,
        which implies that $A$ and $B^c$ are also independent.
        Given that $\emptyset$ and $\Omega$ are trivially independent of any other event, we have $A$ and $B$ are independent for all $A \in \sigma(X)$ and $B \in \sigma(Y)$.
        \item[(ii)] `if': If $X$ and $Y$ are independent, $A \in \sigma(X)$ and $B \in \sigma(Y)$ are trivially independent.
    \end{enumerate}

    \item[(d)] Notice that $\sigma(X_i) = \{\emptyset, \Omega, A_i, A_i^c\}$.
    \begin{enumerate}
        \item[(i)] Pairwise independence: The result can be generalized as we go through all pair of events.
        \item[(ii)] Mutual independence: `if' case is again trivial. For `only if' case, suppose that $(A_i)_i$ are mutually independent.
        The mutual independence suggests that for any finite subset $K \subset \mathbb{N}$ we have$\mathbb{P}\left(\bigcap_{i \in K} A_{i}\right)=\prod_{i \in K} \mathbb{P}\left(A_{i}\right)$.

        Similar to the previous part, for any disjoint finite sets $J, K$ we have
        $$\mathbb{P}\left(\bigcap_{i \in K} A_{i} \cap \bigcap_{i \in J} A_{i}^{c}\right)=\prod_{i \in K} \mathbb{P}\left(A_{i}\right) \prod_{i \in J} \mathbb{P}\left(A_{i}^{c}\right).$$

        This leads to the conclusion that for any finite set $K \subset \mathbb{N}$ and $\left(V_{i}\right)_{i \in K}$ with $V_{i} \in \sigma\left(X_{i}\right)=\left\{\Omega, \emptyset, A_{i}, A_{i}^{c}\right\}$,
        we have $$\mathbb{P}\left(\bigcap_{i \in K} V_{i}\right)=\prod_{i \in K} \mathbb{P}\left(V_{i}\right),$$
        which implies that $(X_i)_i$ are mutually independent.
    \end{enumerate}

\end{enumerate}

\end{proof}

\noindent\textbf{2.12} Our goal in this exercise is to show that $X$ is integrable if and only if $|X|$ is integrable. This is broken down into multiple steps. The first issue is to deal with the measurability of $|X|$. While a direct calculation can also show this, it may be worthwhile to follow a more general path:
\begin{enumerate}
    \item[(a)] Any $f : \RR \to \RR$ continuous function is Borel measurable.
    \item[(b)] Conclude that for any random variable $X$, $|X|$ is also a random variable.
    \item[(c)] Prove that for any random variable $X$, $X$ is integrable if and only if $|X|$ is integrable. (The statement makes sense since $|X|$ is a random variable whenever $X$ is).
\end{enumerate}

\begin{proof}
\begin{enumerate}
    \item[(a)] Let $\cG=\set{(a,b): a<b \text{ with } a,b\in \RR}$, then $\cB(\RR) = \sigma(\cG)$. According to Exercise 2.5(b), to show that $f$ is $\cB(\RR)/\cB(\RR)$-measurable, we just need to show $f$ is $\cB(\RR)/\cG$-measurable. Recall the definition of continuous function, $\forall x_0 \in \RR, \varepsilon>0, \exists \delta>0$ such that when $x \in (x_0-\delta,x_0+\delta)$, there is $f(x)\in (f(x_0)-\varepsilon,f(x_0)+\varepsilon)$. Thus $\forall (a,b)\in \cG, y_0 \in (a,b)$, $y \in (y_0 - \varepsilon, y_0+\varepsilon)$: $f^{-1}(y) \in (f^{-1}(y_0)-\delta,f^{-1}(y_0)+\delta)$, which means $f^{-1}(a,b) = \cup (a',b') \in \cB(\RR)$. We have then shown that $f$ is $\cB(\RR)/\cB(\RR)$-measurable.

    \item[(b)] By definition, $X$ is a random variable means that $X$ is $\cF/\cB(\RR)$-measurable. According to Exercise 2.12 (a), $|X|$ is continous, thus it is $\cB(\RR)/\cB(\RR)$-measurable. Further apply the result of Exercise 2.1, let $f(X)=|X|$, then $f$ is $\cF/\cB(\RR)$-measurable, thus $|X|$ is also a random variable.

    \item[(c)] Define $X^{+}(\omega) = X(\omega) \bOne{X(\omega)>0}$, $X^{-}(\omega) = -X(\omega) \bOne{X(\omega)<0}$. If $X$ is integrable, $\int_{\Omega} X d\mathbb{P} = \int_{\Omega} X^{+} d\mathbb{P} - \int_{\Omega} X^{-} d\mathbb{P} $, which means $X^{+}$ and $X^{-}$ are integrable, and by definition $\int_{\Omega} |X| d\mathbb{P} = \int_{\Omega} X^{+} d\mathbb{P} + \int_{\Omega} X^{-} d\mathbb{P} $ is also integrable, vice versa.

\end{enumerate}
\end{proof}


\noindent\textbf{2.13} (Infinite-valued integrals) Can we consistently extend the definition of integrals so that for non-negative random variables, the integral is always defined (it may be infinite)? Defend your view by either constructing an example (if you are arguing against) or by proving that your definition is consistent with the requirements we have for integrals.
\begin{proof}
We can extend the definition by letting at least one of $\int_{\Omega} X^{+} d\mathbb{P}$ and $\int_{\Omega} X^{-} d\mathbb{P}$ be finite.
\end{proof}

\noindent\textbf{2.14} Prove Proposition 2.6 (Let $(X_i)_i$ be a (possibly infinite) sequence of random variables on the same probability space and assume that $\mathbb{E}[X_i]$ exists for all $i$ and furthermore that $X = \sum_i X_i$ and $\mathbb{E}[X]$ also exists. Then $\mathbb{E}[X] = \sum_i\mathbb{E}[X_i]$)
\begin{proof}
    We only consider the infinite condition, as when the sum is finite, we can use the property of integration to draw the conclusion.
    We should add a condition first, which is
    \begin{equation}
        \label{condition}
        \sum_i E|X_i|< \infty
    \end{equation}
    First, we need to prove
    \begin{equation}
        \label{condition2}
        \mathbb{E} [\sum_i |X_i|] = \sum_i\mathbb{E}[|X_i|]
    \end{equation}
    Consider $Y_n = \sum_{i = 1}^n|X_i|$. As $\{Y_n\}_{n=1}^\infty$ is an ascent sequence and $Y_n\ \forall n$ is integrable. According to the monotone convergence theorem, we have
    \begin{equation}
        \mathbb{E}[\lim_{n\to \infty} Y_n] = \lim_{n\to \infty} \mathbb{E}[Y_n]
    \end{equation}
    which is equivalent to (\ref{condition2}).

    Then we consider $Z_n = \sum_{i=1}^n X_i$, as $|Z_n|\leq \sum_{i=1}^n |X_i|\leq \sum_{i = 1}^\infty |X_i|$. According to (\ref{condition}), $\sum_{i = 1}^\infty |X_i|$ is integrable. Then, using dominated convergence theorem, we have
    \begin{equation}
        \mathbb{E}[\lim_{n\to \infty} Z_n] = \lim_{n\to \infty} \mathbb{E}[Z_n] \ \Longleftrightarrow\ \mathbb{E}[X] = \sum_i\mathbb{E}[X_i]
    \end{equation}
\end{proof}
\iffalse
\begin{enumerate}
\item[(a)] Assume $\forall i, X_i$ is simple function.
    \begin{align}
        \mathbb{E}[X]&=\mathbb{E}\left[{\sum^{n}_{i=1} X_i}\right] \notag \\
        &=\mathbb{E}\left[\sum^{n}_{i=1}\sum^{m}_{j=1}\alpha_{i,j}\mathbb{I}_{A_{i,j}}\{\omega \}\right]\notag \\
        &=\int_{\Omega}\sum^{n}_{i=1}\sum^{m}_{j=1}\alpha_{i,j}\mathbb{I}_{A_{i,j}}\{\omega\} d\mathbb{P}(\omega)\notag \\
        &=\sum^{n}_{i=1}\sum^{m}_{j=1}\alpha_{i,j}\int_{\Omega}\mathbb{I}_{A_{i,j}}\{\omega\} d\mathbb{P}(\omega)\notag \\
        &=\sum^{n}_{i=1}\sum^{m}_{j=1}\alpha_{i,j}\mathbb{P}(A_{i,j})\notag \\
        &=\sum^{n}_{i=1}\mathbb{E}[X_i]\notag
    \end{align}
\item[(b)] Assume $\forall i, X_i$ is non-negative random variable.
    \begin{align}
        \mathbb{E}[X]&=\mathbb{E}\left[\sum^{n}_{i=1}X_i\right]\notag \\
        &=\sup\left\{\int_{\Omega}hd\mathbb{P}:{h \ \text{ is simple and} }\ 0\leq h\leq X=\sum^{n}_{i=1}X_i \right\}\notag \\
        &=\sum^{n}_{i=1}\sup\left\{\int_{\Omega}h_id\mathbb{P}:{h_i \ \text{ is simple and} }\ 0\leq h_i\leq X_i \right\}\notag \\
        &=\sum^{n}_{i=1}\mathbb{E}[X_i]\notag
    \end{align}
\item[(c)] Assume $\forall i, X_i$ is arbitrary random variable.
    \begin{align}
        \mathbb{E}[X]&=\mathbb{E}\left[\sum^{n}_{i=1}X_i\right]\notag\\
        &=\mathbb{E}\left[\sum^{n}_{i=1}(X^+_i-X^-_i)\right]\notag\\
        &=\mathbb{E}\left[\sum^n_{i=1}X^+_i\right]-\mathbb{E}\left[\sum^n_{i=1}X^-_i\right]\notag\\
        &=\sum^n_{i=1}\mathbb{E}\left[X^+_i\right]-\sum^n_{i=1}\mathbb{E}\left[X^-_i\right]\notag\\
        &=\sum^n_{i=1}\left(\mathbb{E}[X^+_i]-\mathbb{E}[X^-_i] \right)\notag\\
        &=\sum^n_{i=1}\mathbb{E}[X_i]\notag
    \end{align}
\end{enumerate}
\fi

\noindent\textbf{2.15} Prove that if $c \in \mathbb{R}$ is a constant, then $\mathbb{E}[c X]=c \mathbb{E}[X]$ (as long as $X$ is integrable).
\begin{proof}
    \begin{enumerate}
        \item[(a)] Firstly, assume $X$ is a simple function and $X(\omega) = \sum_{i=1}^n \alpha_i \mathbb{I}\{\omega \in A_i\}$.
        Then, we have $cX(\omega) = \sum_{i=1}^n c \alpha_i \mathbb{I}\{\omega \in A_i\}$ and
        \begin{equation*}
            \begin{aligned}
                \mathbb{E}[cX]
                &= \sum_{i=1}^n c \alpha_i \mathbb{P}(A_i)\\
                &= c \sum_{i=1}^n \alpha_i \mathbb{P}(A_i)\\
                &= c \mathbb{E}[X].
            \end{aligned}
        \end{equation*}
        \item[(b)] Assume $X \geq 0$ and $c \geq 0$. In this case,
        \begin{equation*}
            \begin{aligned}
                \mathbb{E}[cX]
                &= \sup \{\mathbb{E}[h]: h \text{ is simple and } h \leq cX\}\\
                &= \sup \{\mathbb{E}[ch]: h \text{ is simple and } h \leq X\}\\
                &= \sup \{c\mathbb{E}[h]: h \text{ is simple and } h \leq X\}\\
                &= c\sup \{\mathbb{E}[h]: h \text{ is simple and } h \leq X\}\\
                &= c \mathbb{E}[X].
            \end{aligned}
        \end{equation*}

        \item[(c)] Assume $X$ is arbitrary and $c \geq 0$.
        By factorizing $X = X^+ + X^-$ we can deduce that
        \begin{equation*}
            \begin{aligned}
                \mathbb{E}[cX]
                &= \mathbb{E}[cX^+] - \mathbb{E}[cX^-]\\
                &= c\mathbb{E}[X^+] - c\mathbb{E}[X^-]\\
                &= c\mathbb{E}[X^+ - X^-]\\
                &= c\mathbb{E}[X].
            \end{aligned}
        \end{equation*}

        \item[(d)] For $c < 0$ we may repeat the above procedures by only noticing that $(cX)^+ = -cX^-$ and $(cX)^- = -cX^+$.
    \end{enumerate}
\end{proof}

\noindent\textbf{2.16} Prove Proposition 2.7.

\noindent\textsc{Hint}\quad Follow the `inductive' definition of Lebesgue integrals, starting with simple functions, then non-negative functions and finally arbitrary independent random variables.

\begin{proof}
    If $X$ and $Y$ are independent, $\mathbb{P}(A \cap B) = \mathbb{P}(A) \mathbb{P}(B)$ for any $A \in \sigma(X)$ and $B \in \sigma(Y)$.
    \begin{enumerate}
        \item[(a)] Assume $X$ and $Y$ are both simple.
        Furthermore, we can construct $X(\omega) = \sum_{i=1}^m \alpha_i \mathbb{I}\{\omega \in A_i\}$ and $Y(\omega) = \sum_{j=1}^n \beta_j \mathbb{I}\{\omega \in B_j\}$ where $(A_i)_i$ and $(B_j)_j$ are disjoint.\
        In such a case, we have
        \begin{equation*}
            \begin{aligned}
                \mathbb{E}[XY]
                &= \mathbb{E}[\sum_{i=1}^m \alpha_i \mathbb{I}\{A_i\} \sum_{j=1}^n \beta_j \mathbb{I}\{B_j\}]\\
                &= \mathbb{E}[\sum_{i=1}^m \sum_{j=1}^n \alpha_i \beta_j \mathbb{I}\{A_i\} \mathbb{I}\{B_j\}]\\
                &= \mathbb{E}[\sum_{i=1}^m \sum_{j=1}^n \alpha_i \beta_j \mathbb{I}\{A_i \cap B_j\}]\\
                &= \sum_{i=1}^m \sum_{j=1}^n \alpha_i \beta_j \mathbb{P}\{A_i \cap B_j\}\\
                &= \sum_{i=1}^m \sum_{j=1}^n \alpha_i \beta_j \mathbb{P}\{A_i\} \mathbb{P}\{B_j\}\\
                &= \sum_{i=1}^m \alpha_i \mathbb{P}\{A_i\} \sum_{j=1}^n \beta_j \mathbb{P}\{B_j\}\\
                &= \mathbb{E}[X] \mathbb{E}[Y],
            \end{aligned}
        \end{equation*}
        where the fifth equation follows from $A_i \in \sigma(X)$ and $B_j \in \sigma(Y)$, provided that $(A_i)_i$ and $(B_j)_j$ are disjoint respectively.
        \item[(b)] Assume $X$ and $Y$ are non-negative.
        Then, we can construct two monotone increasing sequences of simple functions $(g_i)_i$ and $(h_i)_i$ for $X$ and $Y$ respectively, where $\lim_{i \rightarrow \infty} g_i = X$ and $\lim_{i \rightarrow \infty} h_i = Y$ and $\mathbb{E}[g_i h_i] = \mathbb{E}[g_i] \mathbb{E}[h_i]$ following the same procedure in (a).
        Such sequences of simple functions can be constructed by making piecewise functions finer.
        Finally, by applying the monotone convergence theorem we have
        \begin{equation*}
            \begin{aligned}
                \mathbb{E}[XY]
                &= \mathbb{E}[\lim_{i \rightarrow \infty} g_i h_i]\\
                &= \lim_{i \rightarrow \infty} \mathbb{E}[g_i h_i]\\
                &= \lim_{i \rightarrow \infty} \mathbb{E}[g_i] \mathbb{E}[h_i]\\
                &= \mathbb{E}[\lim_{i \rightarrow \infty} g_i] \mathbb{E}[\lim_{i \rightarrow \infty} h_i]\\
                &= \mathbb{E}[X] \mathbb{E}[Y].
            \end{aligned}
        \end{equation*}

        \item[(c)] For arbitrary $X$ and $Y$, we separate them into $X = X^+ - X^-$ and $Y = Y^+ - Y^-$, deducing that
        \begin{equation*}
            \begin{aligned}
                \mathbb{E}[XY]
                &= \mathbb{E}[(X^+ - X^-)(Y^+ - Y^-)]\\
                &= \mathbb{E}[X^+ Y^+] + \mathbb{E}[X^+ Y^-] + \mathbb{E}[X^- Y^+] + \mathbb{E}[X^- Y^-]\\
                &= \mathbb{E}[X^+]\mathbb{E}[Y^+] + \mathbb{E}[X^+]\mathbb{E}[Y^-] + \mathbb{E}[X^-]\mathbb{E}[Y^+] + \mathbb{E}[X^-]\mathbb{E}[Y^-]\\
                &= \mathbb{E}[X^+ - X^-] \mathbb{E}[Y^+ - Y^-]\\
                &= \mathbb{E}[X] \mathbb{E}[Y].
            \end{aligned}
        \end{equation*}
    \end{enumerate}
\end{proof}

\noindent\textbf{2.17} Suppose that $\cG_1 \subseteq \cG_2$ and prove that $\EE{X \mid \cG_1}=\EE{ \EE{X\mid \cG_1} \mid \cG_2 }$ almost surely.

\begin{proof}
The implication of this problem is to show that if $X$ is $\cG$-mearuable, then $\EE{X\mid \cG} = X$. Recall the definition of the conditional expectation.


\begin{definition}
    Assume $(\Omega,\mathcal{F},\mathbb{P})$ is a probability space. $\mathcal{G}\subset \mathcal{F}$ is a sub-$\sigma$-algebra of $\mathcal{F}$. $X:\Omega\rightarrow \mathbb{R}$ is a random variable. The
conditional expectation of $X$ given $\mathcal{G}$ is denoted by any random variable $Y$ which satisfies the following 2 properties:
\begin{itemize}
    \item $Y$ is $\mathcal{G}$-measurable
    \item $\forall A\in \mathcal{G}$,
        \begin{equation}
            \int_A Yd\mathbb{P}=\int_AXd\mathbb{P}\notag
        \end{equation}
\end{itemize}
Formally, we denoted $Y$ by notation $\mathbb{E}[X|\mathcal{G}]$.
\end{definition}

Since $X$ is $\mathcal{G}$-measurable, property1 holds. And property2 holds trivially. Thus we can conclude the implication, and then the result of this problem holds trivially as $\EE{X\mid \cG_1}$ is $\cG_1$-mearuable and thus $\cG_2$-mearuable.
\end{proof}


% % {\sc Lemma 1.}\ If $X$ is $\mathcal{G}$-measurable, then $\mathbb{E}[X|\mathcal{G}]=X$ holds a.s.
% \begin{lemma}
%     If $X$ is $\mathcal{G}$-measurable, then $\mathbb{E}[X|\mathcal{G}]=X$ holds a.s.
% \end{lemma}
% \begin{proof}
%     Since $X$ is $\mathcal{G}$-measurable, property1 holds. And property2 holds trivially.
% \end{proof}
% \noindent We can now handily prove Ex.2.17. Since $\mathbb{E}[X|\mathcal{G}_1]$ is $\mathcal{G}_1$-measurable and $\mathcal{G}_1\subset \mathcal{G}_2$, we can see that
% $\mathbb{E}[X|\mathcal{G}_1]$ is $\mathcal{G}_2$-measurable. By Lemma 1, $\mathbb{E}[\mathbb{E}[X|\mathcal{G}_1]|\mathcal{G}_2]=\mathbb{E}[X|\mathcal{G}_1]$ holds almost
% surely.\\


\noindent\textbf{2.18} Demonstrate using an example that in general, for dependent random variables, $\EE{XY} = \EE{X}\EE{Y}$ does not hold.

\begin{proof}
    Let $Y=X$, and $X$ takes value $1$ with probability $1/2$, takes value $-1$ with the other probability $1/2$. In this case, $ \EE{X} = \EE{Y} = 1\times \frac{1}{2} + (-1)\times \frac{1}{2}= 0$ but $\EE{XY} = 1\times \frac{1}{2} + 1\times \frac{1}{2}= 1$.
\end{proof}


\noindent\textbf{2.19} Prove Proposition 2.8.

\noindent \textbf{Proposition 2.8.} If $X\ge 0$ is a non-negative random variable, then
$$\EE{X} = \int_{0}^{\infty}\PP{X>x} dx\,.$$

\noindent \textit{Hint} Argue that $X(\omega) = \bOne{[0,X(\omega)]}(x)dx$ and exchange the integrals. Use the Fubini-Tonelli theorem to justify the exchange of integrals.

\begin{proof}
As the hint suggests, $X(\omega)=\int_{[0, \infty)} \mathbb{I}\{[0, X(\omega)]\}(x) dx$.
Hence, we have
\begin{equation}
    \begin{split}
        \EE{X(\omega)}
        &= \EE{\int_{[0, \infty)} \mathbb{I}\{[0, X(\omega)]\}(x) dx}\\
        &= \int_{[0, \infty)} \EE{\mathbb{I}\{[0, X(\omega)]\}(x)} dx\\
        &= \int_{[0, \infty)} P(X(\omega) > x) dx
    \end{split}
\end{equation}
where the second equality is given by Fubini–Tonell theorem.\\

\end{proof}


\noindent\textbf{2.20} Prove Theorem \ref{thm2.12}
\begin{theorem}
    \label{thm2.12}
    Let $(\Omega,\mathcal{F},\mathbb{P})$ be a probability space, $\mathcal{G},\mathcal{G}_1,\mathcal{G}_2 \subset \mathcal{F}$ be sub-$\sigma$-algebras of $\mathcal{F}$ and $X,Y$ integrable random variables on $(\Omega,\mathcal{F},\mathbb{P})$. The following hold true:
    \begin{enumerate}
        \item[1] If $X \geq 0$, then $E[X|\mathcal{G}] \geq 0$ almost surely.
        \item[2] $E[1|\mathcal{G}]=1$ almost surely.
        \item[3] $E[X+Y|\mathcal{G}] = E[X|\mathcal{G}]+E[Y|\mathcal{G}]$ almost surely.
        \item[4] $E[XY|\mathcal{G}] = YE[X|\mathcal{G}]$ almost surely if $E[XY]$ exists and $Y$ is $\mathcal{G}$-measurable.
        \item[5] If $\mathcal{G}_1\subset\mathcal{G}_2$, then $E[X|\mathcal{G}_1] = E[E[X|\mathcal{G}_2]|\mathcal{G}_1]$
        \item[6] If $\sigma(X)$ is independent of $\mathcal{G}_2$ given $\mathcal{G}_1$, then $E[X|\sigma(\mathcal{G}_1\bigcup \mathcal{G}_2)] = E[X|\mathcal{G}_1]$ almost surely.
        \item[7] If $\mathcal{G} = \{\emptyset,\Omega\}$ is the trival $\sigma$-algebra, then $E[X|\mathcal{G}] = E[X]$ almost surely.
    \end{enumerate}
\end{theorem}
\begin{proof}
    \begin{enumerate}
        \item[1] If $P(E[X|\mathcal{G}] \geq 0) = p <1$, choose $H = \{\omega|E[X|\mathcal{G}](\omega) < 0\}\in \mathcal{G}$. Based on the definition of conditional expectation,
        \begin{equation}
            0\leq \int_H X dP = \int_H E[X|\mathcal{G}] dP <0
        \end{equation}
        which causes contradiction.
        \item[2] For all $H \in \mathcal{G}$,
        \begin{equation}
            P(H)=\int_H 1 dP = \int_H E[1|\mathcal{G}]dP
        \end{equation}
        which means $E[1|\mathcal{G}] = 1$ a.s..
        \item[3] Based on the linearity, for all $H \in \mathcal{G}$
        \begin{equation}
            \begin{aligned}
                \int_H E[X+Y|\mathcal{G}] dP &= \int_H X+Y dP\\
                & = \int_H X dP + \int_H Y dP\\
                & = \int_H E[X|\mathcal{G}] dP + \int_H E[Y|\mathcal{G}] dP\\
                & = \int_H E[X|\mathcal{G}] + E[Y|\mathcal{G}] dP
            \end{aligned}
        \end{equation}
        \item[4] When $Y$ is a simple function, $Y = \sum_{i=1}^n \alpha_i I\{A_i\}$($A_i \in \mathcal{G}$ since $Y$ is $\mathcal{G}$-measurable), for all $H \in \mathcal{G}$,
        \begin{equation}
            \begin{aligned}
                \int_H E[XY|\mathcal{G}] dP &= \int_H XY dP \\
                & = \int_H \sum_{i=1}^n \alpha_i I\{A_i\} X dP \\
                & = \sum_{i=1}^n \alpha_i \int_{H\bigcap A_i} X dP\\
                & = \sum_{i=1}^n \alpha_i \int_{H\bigcap A_i} E[X|\mathcal{G}] dP\\
                & = \int_H YE[X|\mathcal{G}] dP
            \end{aligned}
        \end{equation}
        When $Y$ is a non-negative random variable, $Y = \sup S = \sup\{h\geq 0: h\text{ is simple and } h\leq Y\}$, let $X = X^+ - X^-$, where $X^+ = \max{0,X}$. For all $H \in \mathcal{G}$,
        \begin{equation}
            \begin{aligned}
                \int_H E[XY|\mathcal{G}] dP &= \int_H (X^+-X^-)Y dP \\
                & = \int_H X^+Y dP - \int_H X^-Y dP\\
                & = \sup_{h\in S} \int_H h X^+ dP -\sup_{h\in S} \int_H h X^- dP\ \text{(Dominated convergence Thm)}\\
                & = \int_H YE[X|\mathcal{G}] dP
            \end{aligned}
        \end{equation}
        When $Y$ is general random variable, $Y = Y^+ - Y^-$, using the linearity of integration, we can prove the statement.
        \item[5] For any $H \in \mathcal{G}_1\subset \mathcal{G}_2$,
        \begin{equation}
            \int_H E[X|\mathcal{G}_1]dP = \int_H X dP = \int_H E[X|\mathcal{G}_2] dP = \int_H E[E[X|\mathcal{G}_2]|\mathcal{G}_1] dP
        \end{equation}
        \item[6]
        \item[7] $E[X]$ is a constant, which is measurable on $\mathcal{G}$. We only need to prove that
        \begin{equation}
            \begin{cases}
                \int_\emptyset E[X|\mathcal{G}] dP = \int_\emptyset E[X] dP = 0\\
                \int_\Omega E[X|\mathcal{G}] dP = \int_\Omega E[X] dP = E[X]
            \end{cases}
        \end{equation}
    \end{enumerate}
\end{proof}
\iffalse
We prove the following properties all by contradiction (for the sake of rigor).
\begin{enumerate}
    \item[(1)] Let $G = \{\omega: \mathbb{E}[X \mid \mathcal{G}](\omega) < 0 \}$.
    Then $G \in \mathcal{G}$ since $\mathbb{E}[X \mid \mathcal{G}]$ is $\mathcal{G}$-measurable by definition.
    Now suppose $\mathbb{P}(G) > 0$, then
    \begin{equation}
        \begin{split}
            \int_{G} X d \mathbb{P}
            &= \int_{G} \mathbb{E}(X \mid \mathcal{G}) d \mathbb{P} \\
            &< 0
        \end{split}
    \end{equation}
    where the equality holds by the definition of conditional expectation.
    Now we can find it contradictory as $X \geq 0$.
    Therefore $\mathbb{P}(G) = 0$, and $\mathbb{E}[X \mid \mathcal{G}] \geq 0$ a.s.

    \item[(2)] Let $G = \{\omega: \mathbb{E}[1 \mid \mathcal{G}](\omega) \neq 1 \}$.
    Then $G \in \mathcal{G}$ since $\mathbb{E}[1 \mid \mathcal{G}]$ is $\mathcal{G}$-measurable by definition.
    Now suppose $\mathbb{P}(G) > 0$, then
    \begin{equation}
        \begin{split}
            \int_{G} 1 d \mathbb{P}
            &= \int_{G} \mathbb{E}(1 \mid \mathcal{G}) d \mathbb{P} \\
            &\neq 1
        \end{split}
    \end{equation}
    where the equality holds by the definition of conditional expectation.
    Now we can find it contradictory as $\int_{G} 1 d \mathbb{P} = 1$.
    Therefore $\mathbb{P}(G) = 0$, and $\mathbb{E}[1 \mid \mathcal{G}] = 1$ a.s.
    \item[(3)] Let $G = \{\omega: \mathbb{E}[X + Y \mid \mathcal{G}](\omega) \neq \mathbb{E}[X \mid \mathcal{G}](\omega) + \mathbb{E}[Y \mid \mathcal{G}](\omega) \}$.
    Then $G \in \mathcal{G}$ since $\mathbb{E}[X + Y \mid \mathcal{G}]$, $\mathbb{E}[X \mid \mathcal{G}]$, and $\mathbb{E}[Y \mid \mathcal{G}]$ are all $\mathcal{G}$-measurable by definition.
    Now suppose $\mathbb{P}(G) > 0$, then
    \begin{equation}
        \begin{split}
            \int_{G} (X + Y) d \mathbb{P}
            &= \int_{G} \mathbb{E}(X + Y \mid \mathcal{G}) d \mathbb{P} \\
            &\neq \int_{G} [\mathbb{E}(X \mid \mathcal{G}) + \mathbb{E}(Y \mid \mathcal{G})] d \mathbb{P} \\
            &= \int_{G} \mathbb{E}(X \mid \mathcal{G}) d \mathbb{P} + \int_{G} \mathbb{E}(Y \mid \mathcal{G}) d \mathbb{P} \\
            &= \int_{G} X d \mathbb{P} + \int_{G} Y d \mathbb{P}
        \end{split}
    \end{equation}
    where the first equality and the last one hold by the definition of conditional expectation.
    It contradicts the linearity of expectation in that $\int_{G} (X + Y) d \mathbb{P} \neq \int_{G} X d \mathbb{P} + \int_{G} Y d \mathbb{P}$.
    Therefore $\mathbb{P}(G) = 0$, and $\mathbb{E}(X + Y \mid \mathcal{G}) = \mathbb{E}(X \mid \mathcal{G}) + \mathbb{E}(Y \mid \mathcal{G})$ a.s.
    \item[(4)] Let $G = \{\omega: \mathbb{E}[X Y \mid \mathcal{G}](\omega) \neq Y(\omega) \mathbb{E}[X \mid \mathcal{G}](\omega) \}$.
    Then $G \in \mathcal{G}$ since $\mathbb{E}[XY \mid \mathcal{G}]$, $Y$, and $\mathbb{E}[X \mid \mathcal{G}]$ are all $\mathcal{G}$-measurable by definition.
    Now suppose $\mathbb{P}(G) > 0$, then
    \begin{equation} \label{2.20_contradiction}
        \begin{split}
            \int_{G} X Y d \mathbb{P}
            &= \int_{G} \mathbb{E}(X Y \mid \mathcal{G}) d \mathbb{P} \\
            &\neq \int_{G} Y \mathbb{E}[X \mid \mathcal{G}] d \mathbb{P}
        \end{split}
    \end{equation}

    Now our target is to show it is contradictory.
    This is a bit tricky, so we start from the simplest case and then generalize it step by step.
    \begin{enumerate}
        \item[a.] Suppose $Y = \mathbb{I}_A$ for some $A \in \mathcal{G}$.
        Then
        \begin{equation}
            \int_{G} X Y d \mathbb{P} = \int_{G \cap A} X d \mathbb{P}
        \end{equation}
        and
        \begin{equation}
            \begin{split}
                \int_{G} Y \mathbb{E}[X \mid \mathcal{G}] d \mathbb{P}
                &= \int_{G \cap A} \mathbb{E}[X \mid \mathcal{G}] d \mathbb{P}\\
                &= \int_{G \cap A} X d \mathbb{P}
            \end{split}
        \end{equation}
        Hence it holds that $\int_{G} X Y d \mathbb{P} = \int_{G} Y \mathbb{E}[X \mid \mathcal{G}] d \mathbb{P}$.
        \item[b.] Suppose $Y$ is non-negative and let $\{Y_n\}$ be sequence of non-negative simple functions converging to $Y$ from below.
        Then by linearity, it holds that
        \begin{equation}
            \int_{G} X^+ Y_n d \mathbb{P} = \int_{G} Y_n \mathbb{E}[X^+ \mid \mathcal{G}] d \mathbb{P}
        \end{equation}
        and
        \begin{equation}
            \int_{G} X^- Y_n d \mathbb{P} = \int_{G} Y_n \mathbb{E}[X^- \mid \mathcal{G}] d \mathbb{P}
        \end{equation}

        Applying the monotone convergence we end up with
        \begin{equation}
            \int_{G} X^+ Y d \mathbb{P} = \int_{G} Y \mathbb{E}[X^+ \mid \mathcal{G}] d \mathbb{P}
        \end{equation}
        and
        \begin{equation}
            \int_{G} X^- Y d \mathbb{P} = \int_{G} Y \mathbb{E}[X^- \mid \mathcal{G}] d \mathbb{P}
        \end{equation}
        Hence,
        \begin{equation}
            \begin{split}
                \int_{G} X Y d \mathbb{P}
                &= \int_{G} X^+ Y d \mathbb{P} - \int_{G} X^- Y d \mathbb{P} \\
                &= \int_{G} Y (\mathbb{E}[X^+ \mid \mathcal{G}]  - \mathbb{E}[X^- \mid \mathcal{G}]) d \mathbb{P} \\
                &= \int_{G} Y \mathbb{E}[X^+ - X^- \mid \mathcal{G}] d \mathbb{P} \\
                &= \int_{G} Y \mathbb{E}[X \mid \mathcal{G}] d \mathbb{P}
            \end{split}
        \end{equation}
        \item[c.] Finally, for arbitrary $Y$, we can separate $Y = Y^+ - Y^-$ and the contradiction still holds by linearity of expectation.
    \end{enumerate}

    Therefore, in any case Eq.\ref{2.20_contradiction} is contradictory.
    So $\mathbb{P}(G) = 0$, and $\mathbb{E}[X Y \mid \mathcal{G}] = Y \mathbb{E}[X \mid \mathcal{G}]$ a.s.

    \item[(5)] Let $G = \{\omega: \mathbb{E}[X \mid \mathcal{G}_1](\omega) \neq \mathbb{E}[\mathbb{E}[X \mid \mathcal{G}_2] \mid \mathcal{G}_1](\omega) \}$.
    Then $G \in \mathcal{G}_1$ since both $\mathbb{E}[X \mid \mathcal{G}_1]$ and $\mathbb{E}[\mathbb{E}[X \mid \mathcal{G}_2] \mid \mathcal{G}_1]$ are $\mathcal{G}_1$-measurable by definition.
    Now suppose $\mathbb{P}(G) > 0$, then
    \begin{equation}
        \begin{split}
            \int_{G} X d \mathbb{P}
            &= \int_{G} \mathbb{E}(X \mid \mathcal{G}_1) d \mathbb{P} \\
            &\neq \int_{G} \mathbb{E}[\mathbb{E}[X \mid \mathcal{G}_2] \mid \mathcal{G}_1] d \mathbb{P} \\
            &= \int_{G} \mathbb{E}(X \mid \mathcal{G}_2) d \mathbb{P} \\
            &= \int_{G} X d \mathbb{P}
        \end{split}
    \end{equation}
    The last equality stands since $G \in \mathcal{G}_1$ and $\mathcal{G}_1 \subset \mathcal{G}_2$, which suggests $G \in \mathcal{G}_2$.
    Now we can find it contradictory. Therefore $\mathbb{P}(G) = 0$, and $\mathbb{E}[X \mid \mathcal{G}_1] = \mathbb{E}[\mathbb{E}[X \mid \mathcal{G}_2] \mid \mathcal{G}_1]$ a.s.
    \begin{equation}
        \begin{split}
            \int_{G} X d \mathbb{P}
            &= \int_{G} \mathbb{E}(X \mid \mathcal{G}_1) d \mathbb{P} \\
            &\neq \int_{G} \mathbb{E}[\mathbb{E}[X \mid \mathcal{G}_2] \mid \mathcal{G}_1] d \mathbb{P} \\
            &= \int_{G} \mathbb{E}(X \mid \mathcal{G}_2) d \mathbb{P} \\
            &= \int_{G} X d \mathbb{P}
        \end{split}
    \end{equation}
    \item[(6)] Let $G = \{\omega: \mathbb{E}\left[X \mid \sigma\left(\mathcal{G}_{1} \cup \mathcal{G}_{2}\right)\right](\omega) \neq \mathbb{E}\left[X \mid \mathcal{G}_{1}\right](\omega) \}$.
    Notice that $\mathbb{E}\left[X \mid \mathcal{G}_{1}\right]$ is not only $\mathcal{G}_1$-measurable but also $\sigma\left(\mathcal{G}_{1} \cup \mathcal{G}_{2}\right)$-measurable.
    Thus we have $G \in \sigma\left(\mathcal{G}_{1} \cup \mathcal{G}_{2}\right)$.
    Now suppose $\mathbb{P}(G) > 0$, then
    \begin{equation}
        \begin{split}
            \int_{G} X d \mathbb{P}
            &= \int_{G} \mathbb{E}\left[X \mid \sigma\left(\mathcal{G}_{1} \cup \mathcal{G}_{2}\right)\right] d \mathbb{P} \\
            &\neq \int_{G} \mathbb{E}\left[X \mid \mathcal{G}_{1}\right] d \mathbb{P}
        \end{split}
    \end{equation}
    To show it is contradictory, we want to prove that $\forall G \in \sigma\left(\mathcal{G}_{1} \cup \mathcal{G}_{2}\right)$,
    \begin{equation} \label{2.20_hardone}
        \int_{G} X d \mathbb{P} = \int_{G} \mathbb{E}\left[X \mid \mathcal{G}_{1}\right] d \mathbb{P}
    \end{equation}
    The following techniques are closely related to `Dynkin system`, which is beyond my knowledge.
    The main idea is that if we assume $X$ is non-negative, which can be generalized by linearity, it is enough to establish Eq.\ref{2.20_hardone} for some $\pi$-system that generates $\sigma\left(\mathcal{G}_{1} \cup \mathcal{G}_{2}\right)$.

    One possibility is $\mathcal{H}=\{G_1 \cap G_2: G_1 \in \mathcal{G}_1, G_2 \in \mathcal{G}_2\}$.
    Then, $\forall G_1 \cap G_2 \in \mathcal{H}$,
    \begin{equation}
        \begin{split}
            \int_{G_1 \cap G_2} \mathbb{E}\left[X \mid \mathcal{G}_{1}\right] d \mathbb{P}
            &= \int_{\Omega} \mathbb{E}\left[X \mid \mathcal{G}_{1}\right] \mathbb{I}_{G_1} \mathbb{I}_{G_2} d \mathbb{P} \\
            &= \int_{\Omega} \mathbb{E}\left[X \mid \mathcal{G}_{1}\right] \mathbb{I}_{G_1} d \mathbb{P} \int_{\Omega} \mathbb{I}_{G_2} d \mathbb{P} \\
            &= \int_{\Omega} X \mathbb{I}_{G_1} d \mathbb{P}
            \int_{\Omega} \mathbb{I}_{G_2} d \mathbb{P} \\
            &= \int_{\Omega} X \mathbb{I}_{G_1} \mathbb{I}_{G_2} d \mathbb{P} \\
            &= \int_{G_1 \cap G_2} X d \mathbb{P}
        \end{split}
    \end{equation}
    where the second and fourth equality holds due to independence between $\sigma(X)$ and $\mathcal{G}_2$ given $\mathcal{G}_1$.

    Hence, we find it contradictory. So $\mathbb{P}(G) = 0$ and $\mathbb{E}\left[X \mid \sigma\left(\mathcal{G}_{1} \cup \mathcal{G}_{2}\right)\right] = \mathbb{E}\left[X \mid \mathcal{G}_{1}\right]$ a.s.

    \item[(7)] Let $G = \{\omega: \mathbb{E}[X \mid \mathcal{G}] (\omega) \neq \mathbb{E}[X] \}$.
    Then $G \in \mathcal{G}$ since $\mathbb{E}[X \mid \mathcal{G}]$ is $\mathcal{G}$-measurable by definition.
    And because $\mathcal{G}$ is trivial, $G = \emptyset$ or $G = \Omega$.
    \begin{enumerate}
        \item[a.] If $G = \emptyset$, $P(G) = 0$ for sure.
        \item[b.] If $G = \Omega$, which suggests $\mathbb{E}[X \mid \mathcal{G}] \neq \mathbb{E}[X]$ always holds, we have
        \begin{equation}
            \begin{split}
                \int_{G} X d \mathbb{P}
                &= \int_{G} \mathbb{E}[X \mid \mathcal{G}] d \mathbb{P} \\
                &\neq \int_{G} \mathbb{E}[X] d \mathbb{P} \\
                &= \int_{\Omega} \mathbb{E}[X] d \mathbb{P} \\
                &= \mathbb{E}[X]
            \end{split}
        \end{equation}
        which is obviously contradictory since $\int_{G} X d \mathbb{P} = \int_{\Omega} X d \mathbb{P} = \mathbb{E}[X]$.
    \end{enumerate}
    Therefore, $P(G) = 0$ and hence $\mathbb{E}[X \mid \mathcal{G}] = \mathbb{E}[X]$ a.s.
\end{enumerate}
\fi