|
| 1 | +# /// script |
| 2 | +# requires-python = ">=3.10" |
| 3 | +# dependencies = [ |
| 4 | +# "marimo", |
| 5 | +# "matplotlib", |
| 6 | +# "matplotlib-venn" |
| 7 | +# ] |
| 8 | +# /// |
| 9 | + |
| 10 | +import marimo |
| 11 | + |
| 12 | +__generated_with = "0.11.4" |
| 13 | +app = marimo.App(width="medium") |
| 14 | + |
| 15 | + |
| 16 | +@app.cell |
| 17 | +def _(): |
| 18 | + import marimo as mo |
| 19 | + return (mo,) |
| 20 | + |
| 21 | + |
| 22 | +@app.cell |
| 23 | +def _(): |
| 24 | + import matplotlib.pyplot as plt |
| 25 | + from matplotlib_venn import venn2 |
| 26 | + return plt, venn2 |
| 27 | + |
| 28 | + |
| 29 | +@app.cell(hide_code=True) |
| 30 | +def _(mo): |
| 31 | + mo.md( |
| 32 | + r""" |
| 33 | + # Probability of And |
| 34 | + _This notebook is a computational companion to the book ["Probability for Computer Scientists"](https://chrispiech.github.io/probabilityForComputerScientists/en/part1/prob_and/), by Stanford professor Chris Piech._ |
| 35 | +
|
| 36 | + When calculating the probability of both events occurring together, we need to consider whether the events are independent or dependent. |
| 37 | + Let's explore how to calculate $P(E \cap F)$, i.e. $P(E \text{ and } F)$, in different scenarios. |
| 38 | + """ |
| 39 | + ) |
| 40 | + return |
| 41 | + |
| 42 | + |
| 43 | +@app.cell(hide_code=True) |
| 44 | +def _(mo): |
| 45 | + mo.md( |
| 46 | + r""" |
| 47 | + ## And with Independent Events |
| 48 | +
|
| 49 | + Two events $E$ and $F$ are **independent** if knowing one event occurred doesn't affect the probability of the other. |
| 50 | + For independent events: |
| 51 | +
|
| 52 | + $P(E \text{ and } F) = P(E) \cdot P(F)$ |
| 53 | +
|
| 54 | + For example: |
| 55 | +
|
| 56 | + - Rolling a 6 on one die and getting heads on a coin flip |
| 57 | + - Drawing a heart from a deck, replacing it, and drawing another heart |
| 58 | + - Getting a computer error on Monday vs. Tuesday |
| 59 | +
|
| 60 | + Here's a Python function to calculate probability for independent events: |
| 61 | + """ |
| 62 | + ) |
| 63 | + return |
| 64 | + |
| 65 | + |
| 66 | +@app.cell |
| 67 | +def _(): |
| 68 | + def calc_independent_prob(p_e, p_f): |
| 69 | + return p_e * p_f |
| 70 | + |
| 71 | + # Example 1: Rolling a die and flipping a coin |
| 72 | + p_six = 1/6 # P(rolling a 6) |
| 73 | + p_heads = 1/2 # P(getting heads) |
| 74 | + p_both = calc_independent_prob(p_six, p_heads) |
| 75 | + print(f"Example 1: P(rolling 6 AND getting heads) = {p_six:.3f} × {p_heads:.3f} = {p_both:.3f}") |
| 76 | + return calc_independent_prob, p_both, p_heads, p_six |
| 77 | + |
| 78 | + |
| 79 | +@app.cell |
| 80 | +def _(calc_independent_prob): |
| 81 | + # Example 2: Two independent system components failing |
| 82 | + p_cpu_fail = 0.05 # P(CPU failure) |
| 83 | + p_disk_fail = 0.03 # P(disk failure) |
| 84 | + p_both_fail = calc_independent_prob(p_cpu_fail, p_disk_fail) |
| 85 | + print(f"Example 2: P(both CPU and disk failing) = {p_cpu_fail:.3f} × {p_disk_fail:.3f} = {p_both_fail:.3f}") |
| 86 | + return p_both_fail, p_cpu_fail, p_disk_fail |
| 87 | + |
| 88 | + |
| 89 | +@app.cell(hide_code=True) |
| 90 | +def _(mo): |
| 91 | + mo.md( |
| 92 | + r""" |
| 93 | + ## And with Dependent Events |
| 94 | +
|
| 95 | + For dependent events, we use the **chain rule**: |
| 96 | +
|
| 97 | + $P(E \text{ and } F) = P(E) \cdot P(F|E)$ |
| 98 | +
|
| 99 | + where $P(F|E)$ is the probability of $F$ occurring given that $E$ has occurred. |
| 100 | +
|
| 101 | + For example: |
| 102 | +
|
| 103 | + - Drawing two hearts without replacement |
| 104 | + - Getting two consecutive heads in poker |
| 105 | + - System failures in connected components |
| 106 | +
|
| 107 | + Let's implement this calculation: |
| 108 | + """ |
| 109 | + ) |
| 110 | + return |
| 111 | + |
| 112 | + |
| 113 | +@app.cell |
| 114 | +def _(): |
| 115 | + def calc_dependent_prob(p_e, p_f_given_e): |
| 116 | + return p_e * p_f_given_e |
| 117 | + |
| 118 | + # Example 1: Drawing two hearts without replacement |
| 119 | + p_first_heart = 13/52 # P(first heart) |
| 120 | + p_second_heart = 12/51 # P(second heart | first heart) |
| 121 | + p_both_hearts = calc_dependent_prob(p_first_heart, p_second_heart) |
| 122 | + print(f"Example 1: P(two hearts) = {p_first_heart:.3f} × {p_second_heart:.3f} = {p_both_hearts:.3f}") |
| 123 | + return calc_dependent_prob, p_both_hearts, p_first_heart, p_second_heart |
| 124 | + |
| 125 | + |
| 126 | +@app.cell |
| 127 | +def _(calc_dependent_prob): |
| 128 | + # Example 2: Drawing two aces without replacement |
| 129 | + p_first_ace = 4/52 # P(first ace) |
| 130 | + p_second_ace = 3/51 # P(second ace | first ace) |
| 131 | + p_both_aces = calc_dependent_prob(p_first_ace, p_second_ace) |
| 132 | + print(f"Example 2: P(two aces) = {p_first_ace:.3f} × {p_second_ace:.3f} = {p_both_aces:.3f}") |
| 133 | + return p_both_aces, p_first_ace, p_second_ace |
| 134 | + |
| 135 | + |
| 136 | +@app.cell(hide_code=True) |
| 137 | +def _(mo): |
| 138 | + mo.md( |
| 139 | + r""" |
| 140 | + ## Multiple Events |
| 141 | +
|
| 142 | + For multiple independent events: |
| 143 | +
|
| 144 | + $P(E_1 \text{ and } E_2 \text{ and } \cdots \text{ and } E_n) = \prod_{i=1}^n P(E_i)$ |
| 145 | +
|
| 146 | + For dependent events: |
| 147 | +
|
| 148 | + $P(E_1 \text{ and } E_2 \text{ and } \cdots \text{ and } E_n) = P(E_1) \cdot P(E_2|E_1) \cdot P(E_3|E_1,E_2) \cdots P(E_n|E_1,\ldots,E_{n-1})$ |
| 149 | +
|
| 150 | + Let's visualize these probabilities: |
| 151 | + """ |
| 152 | + ) |
| 153 | + return |
| 154 | + |
| 155 | + |
| 156 | +@app.cell(hide_code=True) |
| 157 | +def _(mo): |
| 158 | + mo.md(r"""### Interactive example""") |
| 159 | + return |
| 160 | + |
| 161 | + |
| 162 | +@app.cell |
| 163 | +def _(event_type): |
| 164 | + event_type |
| 165 | + return |
| 166 | + |
| 167 | + |
| 168 | +@app.cell(hide_code=True) |
| 169 | +def _(mo): |
| 170 | + event_type = mo.ui.dropdown( |
| 171 | + options=[ |
| 172 | + "Independent AND (Die and Coin)", |
| 173 | + "Dependent AND (Sequential Cards)", |
| 174 | + "Multiple AND (System Components)" |
| 175 | + ], |
| 176 | + value="Independent AND (Die and Coin)", |
| 177 | + label="Select AND Probability Scenario" |
| 178 | + ) |
| 179 | + return (event_type,) |
| 180 | + |
| 181 | + |
| 182 | +@app.cell(hide_code=True) |
| 183 | +def _(event_type, mo, plt, venn2): |
| 184 | + # Define the events and their probabilities |
| 185 | + events_data = { |
| 186 | + "Independent AND (Die and Coin)": { |
| 187 | + "sets": (0.33, 0.17, 0.08), # (die, coin, intersection) |
| 188 | + "labels": ("Die\nP(6)=1/6", "Coin\nP(H)=1/2"), |
| 189 | + "title": "Independent Events: Rolling a 6 AND Getting Heads", |
| 190 | + "explanation": r""" |
| 191 | + ### Independent Events: Die Roll and Coin Flip |
| 192 | +
|
| 193 | + $P(\text{Rolling 6}) = \frac{1}{6} \approx 0.17$ |
| 194 | +
|
| 195 | + $P(\text{Getting Heads}) = \frac{1}{2} = 0.5$ |
| 196 | +
|
| 197 | + $P(\text{6 and Heads}) = \frac{1}{6} \times \frac{1}{2} = \frac{1}{12} \approx 0.08$ |
| 198 | +
|
| 199 | + These events are independent because the outcome of the die roll |
| 200 | + doesn't affect the coin flip, and vice versa. |
| 201 | + """, |
| 202 | + }, |
| 203 | + "Dependent AND (Sequential Cards)": { |
| 204 | + "sets": ( |
| 205 | + 0.25, |
| 206 | + 0.24, |
| 207 | + 0.06, |
| 208 | + ), # (first heart, second heart, intersection) |
| 209 | + "labels": ("First\nP(H₁)=13/52", "Second\nP(H₂|H₁)=12/51"), |
| 210 | + "title": "Dependent Events: Drawing Two Hearts", |
| 211 | + "explanation": r""" |
| 212 | + ### Dependent Events: Drawing Hearts |
| 213 | +
|
| 214 | + $P(\text{First Heart}) = \frac{13}{52} = 0.25$ |
| 215 | +
|
| 216 | + $P(\text{Second Heart}|\text{First Heart}) = \frac{12}{51} \approx 0.24$ |
| 217 | +
|
| 218 | + $P(\text{Both Hearts}) = \frac{13}{52} \times \frac{12}{51} \approx 0.06$ |
| 219 | +
|
| 220 | + These events are dependent because drawing the first heart |
| 221 | + changes the probability of drawing the second heart. |
| 222 | + """, |
| 223 | + }, |
| 224 | + "Multiple AND (System Components)": { |
| 225 | + "sets": (0.05, 0.03, 0.0015), # (CPU fail, disk fail, intersection) |
| 226 | + "labels": ("CPU\nP(C)=0.05", "Disk\nP(D)=0.03"), |
| 227 | + "title": "Independent System Failures", |
| 228 | + "explanation": r""" |
| 229 | + ### System Component Failures |
| 230 | +
|
| 231 | + $P(\text{CPU Failure}) = 0.05$ |
| 232 | +
|
| 233 | + $P(\text{Disk Failure}) = 0.03$ |
| 234 | +
|
| 235 | + $P(\text{Both Fail}) = 0.05 \times 0.03 = 0.0015$ |
| 236 | +
|
| 237 | + Component failures are typically independent in **well-designed systems**, |
| 238 | + meaning one component's failure doesn't affect the other's probability of failing. |
| 239 | + """, |
| 240 | + }, |
| 241 | + } |
| 242 | + |
| 243 | + # Get data for selected event type |
| 244 | + data = events_data[event_type.value] |
| 245 | + |
| 246 | + # Create visualization |
| 247 | + plt.figure(figsize=(10, 5)) |
| 248 | + v = venn2(subsets=data["sets"], set_labels=data["labels"]) |
| 249 | + plt.title(data["title"]) |
| 250 | + |
| 251 | + # Display explanation alongside visualization |
| 252 | + mo.hstack([plt.gcf(), mo.md(data["explanation"])]) |
| 253 | + return data, events_data, v |
| 254 | + |
| 255 | + |
| 256 | +@app.cell(hide_code=True) |
| 257 | +def _(mo): |
| 258 | + mo.md( |
| 259 | + r""" |
| 260 | + ## 🤔 Test Your Understanding |
| 261 | +
|
| 262 | + Which of these statements about AND probability are true? |
| 263 | +
|
| 264 | + <details> |
| 265 | + <summary>1. The probability of getting two sixes in a row with a fair die is 1/36</summary> |
| 266 | +
|
| 267 | + ✅ True! Since die rolls are independent events: |
| 268 | + P(two sixes) = P(first six) × P(second six) = 1/6 × 1/6 = 1/36 |
| 269 | + </details> |
| 270 | +
|
| 271 | + <details> |
| 272 | + <summary>2. When drawing cards without replacement, P(two kings) = 4/52 × 4/52</summary> |
| 273 | +
|
| 274 | + ❌ False! This is a dependent event. The correct calculation is: |
| 275 | + P(two kings) = P(first king) × P(second king | first king) = 4/52 × 3/51 |
| 276 | + </details> |
| 277 | +
|
| 278 | + <details> |
| 279 | + <summary>3. If P(A) = 0.3 and P(B) = 0.4, then P(A and B) must be 0.12</summary> |
| 280 | +
|
| 281 | + ❌ False! P(A and B) = 0.12 only if A and B are independent events. |
| 282 | + If they're dependent, we need P(B|A) to calculate P(A and B). |
| 283 | + </details> |
| 284 | +
|
| 285 | + <details> |
| 286 | + <summary>4. The probability of rolling a six AND getting tails is (1/6 × 1/2)</summary> |
| 287 | +
|
| 288 | + ✅ True! These are independent events, so we multiply their individual probabilities: |
| 289 | + P(six and tails) = P(six) × P(tails) = 1/6 × 1/2 = 1/12 |
| 290 | + </details> |
| 291 | + """ |
| 292 | + ) |
| 293 | + return |
| 294 | + |
| 295 | + |
| 296 | +@app.cell(hide_code=True) |
| 297 | +def _(mo): |
| 298 | + mo.md( |
| 299 | + """ |
| 300 | + ## Summary |
| 301 | +
|
| 302 | + You've learned: |
| 303 | +
|
| 304 | + - How to identify independent vs dependent events |
| 305 | + - The multiplication rule for independent events |
| 306 | + - The chain rule for dependent events |
| 307 | + - How to extend these concepts to multiple events |
| 308 | +
|
| 309 | + In the next lesson, we'll explore **law of total probability** in more detail, building on our understanding of various topics. |
| 310 | + """ |
| 311 | + ) |
| 312 | + return |
| 313 | + |
| 314 | + |
| 315 | +if __name__ == "__main__": |
| 316 | + app.run() |
0 commit comments