Skip to content

Commit 0b08ba3

Browse files
authored
Merge pull request #33 from marimo-team/haleshot/04_conditional_probability
Add `conditional probability` notebook
2 parents 8e910e2 + a50dbac commit 0b08ba3

File tree

1 file changed

+369
-0
lines changed

1 file changed

+369
-0
lines changed
Lines changed: 369 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,369 @@
1+
# /// script
2+
# requires-python = ">=3.10"
3+
# dependencies = [
4+
# "marimo",
5+
# "matplotlib==3.10.0",
6+
# "matplotlib-venn==1.1.1",
7+
# "numpy==2.2.2",
8+
# ]
9+
# ///
10+
11+
import marimo
12+
13+
__generated_with = "0.11.4"
14+
app = marimo.App(width="medium", app_title="Conditional Probability")
15+
16+
17+
@app.cell
18+
def _():
19+
import marimo as mo
20+
return (mo,)
21+
22+
23+
@app.cell(hide_code=True)
24+
def _(mo):
25+
mo.md(
26+
r"""
27+
# Conditional Probability
28+
29+
_This notebook is a computational companion to the book ["Probability for Computer Scientists"](https://chrispiech.github.io/probabilityForComputerScientists/en/part1/cond_prob/), by Stanford professor Chris Piech._
30+
31+
In probability theory, we often want to update our beliefs when we receive new information.
32+
Conditional probability helps us formalize this process by calculating "_what is the chance of
33+
event $E$ happening given that we have already observed some other event $F$?_"[<sup>1</sup>](https://chrispiech.github.io/probabilityForComputerScientists/en/part1/cond_prob/)
34+
35+
When we condition on an event $F$:
36+
37+
- We enter the universe where $F$ has occurred
38+
- Only outcomes consistent with $F$ are possible
39+
- Our sample space reduces to $F$
40+
"""
41+
)
42+
return
43+
44+
45+
@app.cell(hide_code=True)
46+
def _(mo):
47+
mo.md(
48+
r"""
49+
## Definition of Conditional Probability
50+
51+
The probability of event $E$ given that event $F$ has occurred is denoted as $P(E \mid F)$ and is defined as:
52+
53+
$$P(E \mid F) = \frac{P(E \cap F)}{P(F)}$$
54+
55+
This formula tells us that the conditional probability is the probability of both events occurring
56+
divided by the probability of the conditioning event.
57+
58+
Let's start with a visual example.
59+
"""
60+
)
61+
return
62+
63+
64+
@app.cell
65+
def _():
66+
import matplotlib.pyplot as plt
67+
from matplotlib_venn import venn3
68+
import numpy as np
69+
return np, plt, venn3
70+
71+
72+
@app.cell(hide_code=True)
73+
def _(mo, plt, venn3):
74+
# Create figure with square boundaries
75+
plt.figure(figsize=(10, 3))
76+
77+
# Draw square sample space first
78+
rect = plt.Rectangle((-2, -2), 4, 4, fill=False, color="gray", linestyle="--")
79+
plt.gca().add_patch(rect)
80+
81+
# Set the axis limits to show the full rectangle
82+
plt.xlim(-2.5, 2.5)
83+
plt.ylim(-2.5, 2.5)
84+
85+
# Create Venn diagram showing E and F
86+
# For venn3, subsets order is: (100, 010, 110, 001, 101, 011, 111)
87+
# Representing: (A, B, AB, C, AC, BC, ABC)
88+
v = venn3(subsets=(30, 20, 10, 40, 0, 0, 0), set_labels=("E", "F", "Rest"))
89+
90+
# Customize colors
91+
if v:
92+
for id in ["100", "010", "110", "001"]:
93+
if v.get_patch_by_id(id):
94+
if id == "100":
95+
v.get_patch_by_id(id).set_color("#ffcccc") # Light red for E
96+
elif id == "010":
97+
v.get_patch_by_id(id).set_color("#ccffcc") # Light green for F
98+
elif id == "110":
99+
v.get_patch_by_id(id).set_color(
100+
"#e6ffe6"
101+
) # Lighter green for intersection
102+
elif id == "001":
103+
v.get_patch_by_id(id).set_color("white") # White for rest
104+
105+
plt.title("Conditional Probability in Sample Space")
106+
107+
# Remove ticks but keep the box visible
108+
plt.gca().set_yticks([])
109+
plt.gca().set_xticks([])
110+
plt.axis("on")
111+
112+
# Add sample space annotation with arrow
113+
plt.annotate(
114+
"Sample Space (100)",
115+
xy=(-1.5, 1.5),
116+
xytext=(-2.2, 2),
117+
bbox=dict(boxstyle="round,pad=0.5", fc="white", ec="gray"),
118+
arrowprops=dict(arrowstyle="->"),
119+
)
120+
121+
# Add explanation
122+
explanation = mo.md(r"""
123+
### Visual Intuition
124+
125+
In our sample space of 100 outcomes:
126+
127+
- Event $E$ occurs in 40 cases (red region: 30 + 10)
128+
- Event $F$ occurs in 30 cases (green region: 20 + 10)
129+
- Both events occur together in 10 cases (overlap)
130+
- Remaining cases: 40 (to complete sample space of 100)
131+
132+
When we condition on $F$:
133+
$$P(E \mid F) = \frac{P(E \cap F)}{P(F)} = \frac{10}{30} = \frac{1}{3} \approx 0.33$$
134+
135+
This means: When we know $F$ has occurred (restricting ourselves to the green region),
136+
the probability of $E$ also occurring is $\frac{1}{3}$ - as 10 out of the 30 cases in the
137+
green region also belong to the red region.
138+
""")
139+
140+
mo.vstack([mo.center(plt.gcf()), explanation])
141+
return explanation, id, rect, v
142+
143+
144+
@app.cell(hide_code=True)
145+
def _(mo):
146+
mo.md(
147+
r"Next, here's a function that computes $P(E \mid F)$, given $P( E \cap F)$ and $P(F)$"
148+
)
149+
return
150+
151+
152+
@app.cell
153+
def _():
154+
def conditional_probability(p_intersection, p_condition):
155+
if p_condition == 0:
156+
raise ValueError("Cannot condition on an impossible event")
157+
if p_intersection > p_condition:
158+
raise ValueError("P(E∩F) cannot be greater than P(F)")
159+
160+
return p_intersection / p_condition
161+
return (conditional_probability,)
162+
163+
164+
@app.cell
165+
def _(conditional_probability):
166+
# Example 1: Rolling a die
167+
# E: Rolling an even number (2,4,6)
168+
# F: Rolling a number greater than 3 (4,5,6)
169+
p_even_given_greater_than_3 = conditional_probability(2 / 6, 3 / 6)
170+
print("Example 1: Rolling a die")
171+
print(f"P(Even | >3) = {p_even_given_greater_than_3}") # Should be 2/3
172+
return (p_even_given_greater_than_3,)
173+
174+
175+
@app.cell
176+
def _(conditional_probability):
177+
# Example 2: Cards
178+
# E: Drawing a Heart
179+
# F: Drawing a Face card (J,Q,K)
180+
p_heart_given_face = conditional_probability(3 / 52, 12 / 52)
181+
print("\nExample 2: Drawing cards")
182+
print(f"P(Heart | Face card) = {p_heart_given_face}") # Should be 1/4
183+
return (p_heart_given_face,)
184+
185+
186+
@app.cell
187+
def _(conditional_probability):
188+
# Example 3: Student grades
189+
# E: Getting an A
190+
# F: Studying more than 3 hours
191+
p_a_given_study = conditional_probability(0.24, 0.40)
192+
print("\nExample 3: Student grades")
193+
print(f"P(A | Studied >3hrs) = {p_a_given_study}") # Should be 0.6
194+
return (p_a_given_study,)
195+
196+
197+
@app.cell
198+
def _(conditional_probability):
199+
# Example 4: Weather
200+
# E: Raining
201+
# F: Cloudy
202+
p_rain_given_cloudy = conditional_probability(0.15, 0.30)
203+
print("\nExample 4: Weather")
204+
print(f"P(Rain | Cloudy) = {p_rain_given_cloudy}") # Should be 0.5
205+
return (p_rain_given_cloudy,)
206+
207+
208+
@app.cell
209+
def _(conditional_probability):
210+
# Example 5: Error cases
211+
print("\nExample 5: Error cases")
212+
try:
213+
# Cannot condition on impossible event
214+
conditional_probability(0.5, 0)
215+
except ValueError as e:
216+
print(f"Error 1: {e}")
217+
218+
try:
219+
# Intersection cannot be larger than condition
220+
conditional_probability(0.7, 0.5)
221+
except ValueError as e:
222+
print(f"Error 2: {e}")
223+
return
224+
225+
226+
@app.cell(hide_code=True)
227+
def _(mo):
228+
mo.md(
229+
r"""
230+
## The Conditional Paradigm
231+
232+
When we condition on an event, we enter a new probability universe. In this universe:
233+
234+
1. All probability axioms still hold
235+
2. We must consistently condition on the same event
236+
3. Our sample space becomes the conditioning event
237+
238+
Here's how our familiar probability rules look when conditioned on event $G$:
239+
240+
| Rule | Original | Conditioned on $G$ |
241+
|------|----------|-------------------|
242+
| Axiom 1 | $0 \leq P(E) \leq 1$ | $0 \leq P(E \mid G) \leq 1$ |
243+
| Axiom 2 | $P(S) = 1$ | $P(S \mid G) = 1$ |
244+
| Axiom 3* | $P(E \cup F) = P(E) + P(F)$ | $P(E \cup F \mid G) = P(E \mid G) + P(F \mid G)$ |
245+
| Complement | $P(E^C) = 1 - P(E)$ | $P(E^C \mid G) = 1 - P(E \mid G)$ |
246+
247+
*_For mutually exclusive events_
248+
"""
249+
)
250+
return
251+
252+
253+
@app.cell(hide_code=True)
254+
def _(mo):
255+
mo.md(
256+
r"""
257+
## Multiple Conditions
258+
259+
We can condition on multiple events. The notation $P(E \mid F,G)$ means "_the probability of $E$
260+
occurring, given that both $F$ and $G$ have occurred._"
261+
262+
The conditional probability formula still holds in the universe where $G$ has occurred:
263+
264+
$$P(E \mid F,G) = \frac{P(E \cap F \mid G)}{P(F \mid G)}$$
265+
266+
This is a powerful extension that allows us to update our probabilities as we receive
267+
multiple pieces of information.
268+
"""
269+
)
270+
return
271+
272+
273+
@app.cell
274+
def _():
275+
def multiple_conditional_probability(
276+
p_intersection_all, p_intersection_conditions, p_condition
277+
):
278+
"""Calculate P(E|F,G) = P(E∩F|G)/P(F|G) = P(E∩F∩G)/P(F∩G)"""
279+
if p_condition == 0:
280+
raise ValueError("Cannot condition on an impossible event")
281+
if p_intersection_conditions == 0:
282+
raise ValueError(
283+
"Cannot condition on an impossible combination of events"
284+
)
285+
if p_intersection_all > p_intersection_conditions:
286+
raise ValueError("P(E∩F∩G) cannot be greater than P(F∩G)")
287+
288+
return p_intersection_all / p_intersection_conditions
289+
return (multiple_conditional_probability,)
290+
291+
292+
@app.cell
293+
def _(multiple_conditional_probability):
294+
# Example: College admissions
295+
# E: Getting admitted
296+
# F: High GPA
297+
# G: Good test scores
298+
299+
# P(E∩F∩G) = P(Admitted ∩ HighGPA ∩ GoodScore) = 0.15
300+
# P(F∩G) = P(HighGPA ∩ GoodScore) = 0.25
301+
302+
p_admit_given_both = multiple_conditional_probability(0.15, 0.25, 0.25)
303+
print("College Admissions Example:")
304+
print(
305+
f"P(Admitted | High GPA, Good Scores) = {p_admit_given_both}"
306+
) # Should be 0.6
307+
308+
# Error case: impossible condition
309+
try:
310+
multiple_conditional_probability(0.3, 0.2, 0.2)
311+
except ValueError as e:
312+
print(f"\nError case: {e}")
313+
return (p_admit_given_both,)
314+
315+
316+
@app.cell(hide_code=True)
317+
def _(mo):
318+
mo.md(
319+
r"""
320+
## 🤔 Test Your Understanding
321+
322+
Which of these statements about conditional probability are true?
323+
324+
<details>
325+
<summary>Knowing F occurred always decreases the probability of E</summary>
326+
❌ False! Conditioning on F can either increase or decrease P(E), depending on how E and F are related.
327+
</details>
328+
329+
<details>
330+
<summary>P(E|F) represents entering a new probability universe where F has occurred</summary>
331+
✅ True! We restrict ourselves to only the outcomes where F occurred, making F our new sample space.
332+
</details>
333+
334+
<details>
335+
<summary>If P(E|F) = P(E), then E and F must be the same event</summary>
336+
❌ False! This actually means E and F are independent - knowing one doesn't affect the other.
337+
</details>
338+
339+
<details>
340+
<summary>P(E|F) can be calculated by dividing P(E∩F) by P(F)</summary>
341+
✅ True! This is the fundamental definition of conditional probability.
342+
</details>
343+
"""
344+
)
345+
return
346+
347+
348+
@app.cell(hide_code=True)
349+
def _(mo):
350+
mo.md(
351+
"""
352+
## Summary
353+
354+
You've learned:
355+
356+
- How conditional probability updates our beliefs with new information
357+
- The formula $P(E \mid F) = P(E \cap F)/P(F)$ and its intuition
358+
- How probability rules work in conditional universes
359+
- How to handle multiple conditions
360+
361+
In the next lesson, we'll explore **independence** - when knowing about one event
362+
tells us nothing about another.
363+
"""
364+
)
365+
return
366+
367+
368+
if __name__ == "__main__":
369+
app.run()

0 commit comments

Comments
 (0)