diff --git a/probability/08_bayes_theorem.py b/probability/08_bayes_theorem.py new file mode 100644 index 0000000..340fc13 --- /dev/null +++ b/probability/08_bayes_theorem.py @@ -0,0 +1,531 @@ +# /// script +# requires-python = ">=3.10" +# dependencies = [ +# "marimo", +# "matplotlib==3.10.0", +# "numpy==2.2.3", +# ] +# /// + +import marimo + +__generated_with = "0.11.8" +app = marimo.App(width="medium", app_title="Bayes Theorem") + + +@app.cell +def _(): + import marimo as mo + return (mo,) + + +@app.cell +def _(): + import matplotlib.pyplot as plt + import numpy as np + return np, plt + + +@app.cell(hide_code=True) +def _(mo): + mo.md( + r""" + # Bayes' Theorem + + _This notebook is a computational companion to the book ["Probability for Computer Scientists"](https://chrispiech.github.io/probabilityForComputerScientists/en/part1/bayes_theorem/), by Stanford professor Chris Piech._ + + In the 1740s, an English minister named Thomas Bayes discovered a profound mathematical relationship that would revolutionize how we reason about uncertainty. His theorem provides an elegant framework for calculating the probability of a hypothesis being true given observed evidence. + + At its core, Bayes' Theorem connects two different types of probabilities: the probability of a hypothesis given evidence $P(H|E)$, and its reverse - the probability of evidence given a hypothesis $P(E|H)$. This relationship is particularly powerful because it allows us to compute difficult probabilities using ones that are easier to measure. + """ + ) + return + + +@app.cell(hide_code=True) +def _(mo): + mo.md( + r""" + ## The Heart of Bayesian Reasoning + + The fundamental insight of Bayes' Theorem lies in its ability to relate what we want to know with what we can measure. When we observe evidence $E$, we often want to know the probability of a hypothesis $H$ being true. However, it's typically much easier to measure how likely we are to observe the evidence when we know the hypothesis is true. + + This reversal of perspective - from $P(H|E)$ to $P(E|H)$ - is powerful because it lets us: + 1. Start with what we know (prior beliefs) + 2. Use easily measurable relationships (likelihood) + 3. Update our beliefs with new evidence + + This approach mirrors both how humans naturally learn and the scientific method: we begin with prior beliefs, gather evidence, and update our understanding based on that evidence. This makes Bayes' Theorem not just a mathematical tool, but a framework for rational thinking. + """ + ) + return + + +@app.cell(hide_code=True) +def _(mo): + mo.md( + r""" + ## The Formula + + Bayes' Theorem states: + + $P(H|E) = \frac{P(E|H)P(H)}{P(E)}$ + + Where: + + - $P(H|E)$ is the **posterior probability** - probability of hypothesis H given evidence E + - $P(E|H)$ is the **likelihood** - probability of evidence E given hypothesis H + - $P(H)$ is the **prior probability** - initial probability of hypothesis H + - $P(E)$ is the **evidence** - total probability of observing evidence E + + The denominator $P(E)$ can be expanded using the [Law of Total Probability](https://marimo.app/gh/marimo-team/learn/main?entrypoint=probability%2F07_law_of_total_probability.py): + + $P(E) = P(E|H)P(H) + P(E|H^c)P(H^c)$ + """ + ) + return + + +@app.cell(hide_code=True) +def _(mo): + mo.md( + r""" + ## Understanding Each Component + + ### 1. Prior Probability - $P(H)$ + - Initial belief about hypothesis before seeing evidence + - Based on previous knowledge or assumptions + - Example: Probability of having a disease before any tests + + ### 2. Likelihood - $P(E|H)$ + - Probability of evidence given hypothesis is true + - Often known from data or scientific studies + - Example: Probability of positive test given disease present + + ### 3. Evidence - $P(E)$ + - Total probability of observing the evidence + - Acts as a normalizing constant + - Can be calculated using Law of Total Probability + + ### 4. Posterior - $P(H|E)$ + - Updated probability after considering evidence + - Combines prior knowledge with new evidence + - Becomes new prior for future updates + """ + ) + return + + +@app.cell(hide_code=True) +def _(mo): + mo.md( + r""" + ## Real-World Examples + + ### 1. Medical Testing + - **Want to know**: $P(\text{Disease}|\text{Positive})$ - Probability of disease given positive test + - **Easy to know**: $P(\text{Positive}|\text{Disease})$ - Test accuracy for sick people + - **Causality**: Disease causes test results, not vice versa + + ### 2. Student Ability + - **Want to know**: $P(\text{High Ability}|\text{Good Grade})$ - Probability student is skilled given good grade + - **Easy to know**: $P(\text{Good Grade}|\text{High Ability})$ - Probability good students get good grades + - **Causality**: Ability influences grades, not vice versa + + ### 3. Cell Phone Location + - **Want to know**: $P(\text{Location}|\text{Signal Strength})$ - Probability of phone location given signal + - **Easy to know**: $P(\text{Signal Strength}|\text{Location})$ - Signal strength at known locations + - **Causality**: Location determines signal strength, not vice versa + + These examples highlight a common pattern: what we want to know (posterior) is harder to measure directly than its reverse (likelihood). + """ + ) + return + + +@app.cell +def _(): + def calculate_posterior(prior, likelihood, false_positive_rate): + # Calculate P(E) using Law of Total Probability + p_e = likelihood * prior + false_positive_rate * (1 - prior) + + # Calculate posterior using Bayes' Theorem + posterior = (likelihood * prior) / p_e + return posterior, p_e + return (calculate_posterior,) + + +@app.cell +def _(calculate_posterior): + # Medical test example + p_disease = 0.01 # Prior: 1% have the disease + p_positive_given_disease = 0.95 # Likelihood: 95% test accuracy + p_positive_given_healthy = 0.10 # False positive rate: 10% + + medical_posterior, medical_evidence = calculate_posterior( + p_disease, + p_positive_given_disease, + p_positive_given_healthy + ) + return ( + medical_evidence, + medical_posterior, + p_disease, + p_positive_given_disease, + p_positive_given_healthy, + ) + + +@app.cell +def _(medical_explanation): + medical_explanation + return + + +@app.cell(hide_code=True) +def _(medical_posterior, mo): + medical_explanation = mo.md(f""" + ### Medical Testing Example + + Consider a medical test for a rare disease: + + - Prior: 1% of population has the disease + - Likelihood: 95% test accuracy for sick people + - False positive: 10% of healthy people test positive + + Using Bayes' Theorem: + $P(D|+) = \\frac{{0.95 times 0.01}}{{0.95 times 0.01 + 0.10 times 0.99}} = {medical_posterior:.3f}$ + + Despite a positive test, there's only a {medical_posterior:.1%} chance of having the disease! + This counterintuitive result occurs because the disease is rare (low prior probability). + """) + return (medical_explanation,) + + +@app.cell +def _(calculate_posterior): + # Student ability example + p_high_ability = 0.30 # Prior: 30% of students have high ability + p_good_grade_given_high = 0.90 # Likelihood: 90% of high ability students get good grades + p_good_grade_given_low = 0.40 # 40% of lower ability students also get good grades + + student_posterior, student_evidence = calculate_posterior( + p_high_ability, + p_good_grade_given_high, + p_good_grade_given_low + ) + return ( + p_good_grade_given_high, + p_good_grade_given_low, + p_high_ability, + student_evidence, + student_posterior, + ) + + +@app.cell +def _(student_explanation): + student_explanation + return + + +@app.cell(hide_code=True) +def _(mo, student_posterior): + student_explanation = mo.md(f""" + ### Student Ability Example + + If a student gets a good grade, what's the probability they have high ability? + + Using Bayes' Theorem: + + - Prior: 30% have high ability + - Likelihood: 90% of high ability students get good grades + - False positive: 40% of lower ability students get good grades + + Result: P(High Ability|Good Grade) = {student_posterior:.2f} + + So a good grade increases our confidence in high ability from 30% to {student_posterior:.1%} + """) + return (student_explanation,) + + +@app.cell +def _(calculate_posterior): + # Cell phone location example + p_location_a = 0.25 # Prior probability of being in location A + p_strong_signal_at_a = 0.85 # Likelihood of strong signal at A + p_strong_signal_elsewhere = 0.15 # False positive rate + + location_posterior, location_evidence = calculate_posterior( + p_location_a, + p_strong_signal_at_a, + p_strong_signal_elsewhere + ) + return ( + location_evidence, + location_posterior, + p_location_a, + p_strong_signal_at_a, + p_strong_signal_elsewhere, + ) + + +@app.cell +def _(location_explanation): + location_explanation + return + + +@app.cell(hide_code=True) +def _(location_posterior, mo): + location_explanation = mo.md(f""" + ### Cell Phone Location Example + + Given a strong signal, what's the probability the phone is in location A? + + Using Bayes' Theorem: + + - Prior: 25% chance of being in location A + - Likelihood: 85% chance of strong signal at A + - False positive: 15% chance of strong signal elsewhere + + Result: P(Location A|Strong Signal) = {location_posterior:.2f} + + The strong signal increases our confidence in location A from 25% to {location_posterior:.1%} + """) + return (location_explanation,) + + +@app.cell(hide_code=True) +def _(mo): + mo.md(r"""## Interactive example""") + return + + +@app.cell(hide_code=True) +def _(mo): + mo.md( + r""" + + _This interactive exmaple was made with [marimo](https://github.com/marimo-team/marimo/blob/main/examples/misc/bayes_theorem.py), and is [based on an explanation of Bayes' Theorem by Grant Sanderson](https://www.youtube.com/watch?v=HZGCoVF3YvM&list=PLzq7odmtfKQw2KIbQq0rzWrqgifHKkPG1&index=1&t=3s)_. + + Bayes theorem provides a convenient way to calculate the probability + of a hypothesis event $H$ given evidence $E$: + + \[ + P(H \mid E) = \frac{P(H) P(E \mid H)}{P(E)}. + \] + + + **The numerator.** The numerator is the probability of events $E$ and $H$ happening + together; that is, + + \[ + P(H) P(E \mid H) = P(E \cap H). + \] + + **The denominator.** + In most calculations, it is helpful to rewrite the denominator $P(E)$ as + + \[ + P(E) = P(H)P(E \mid H) + P(\neg H) P (E \mid \neg H), + \] + + which in turn can also be written as + + + \[ + P(E) = P(E \cap H) + P(E \cap \neg H). + \] + """ + ).left() + return + + +@app.cell(hide_code=True) +def _( + bayes_result, + construct_probability_plot, + mo, + p_e, + p_e_given_h, + p_e_given_not_h, + p_h, +): + mo.hstack( + [ + mo.md( + rf""" + ### Probability parameters + + You can configure the probabilities of the events $H$, $E \mid H$, and $E \mid \neg H$ + + {mo.as_html([p_h, p_e_given_h, p_e_given_not_h])} + + The plot on the right visualizes the probabilities of these events. + + 1. The yellow rectangle represents the event $H$, and its area is $P(H) = {p_h.value:0.2f}$. + 2. The teal rectangle overlapping with the yellow one represents the event $E \cap H$, and + its area is $P(H) \cdot P(E \mid H) = {p_h.value * p_e_given_h.value:0.2f}$. + 3. The teal rectangle that doesn't overlap the yellow rectangle represents the event $E \cap \neg H$, and + its area is $P(\neg H) \cdot P(E \mid \neg H) = {(1 - p_h.value) * p_e_given_not_h.value:0.2f}$. + + Notice that the sum of the areas in $2$ and $3$ is the probability $P(E) = {p_e:0.2f}$. + + One way to think about Bayes' Theorem is the following: the probability $P(H \mid E)$ is the probability + of $E$ and $H$ happening together (the area of the rectangle $2$), divided by the probability of $E$ happening + at all (the sum of the areas of $2$ and $3$). + In this case, Bayes' Theorem says + + \[ + P(H \mid E) = \frac{{P(H) P(E \mid H)}}{{P(E)}} = \frac{{{p_h.value} \cdot {p_e_given_h.value}}}{{{p_e:0.2f}}} = {bayes_result:0.2f} + \] + """ + ), + construct_probability_plot(), + ], + justify="start", + gap=4, + align="start", + widths=[0.33, 0.5], + ) + return + + +@app.cell(hide_code=True) +def _(mo): + mo.md( + r""" + ## Applications in Computer Science + + Bayes' Theorem is fundamental in many computing applications: + + 1. **Spam Filtering** + + - $P(\text{Spam}|\text{Words})$ = Probability email is spam given its words + - Updates as new emails are classified + + 2. **Machine Learning** + + - Naive Bayes classifiers + - Probabilistic graphical models + - Bayesian neural networks + + 3. **Computer Vision** + + - Object detection confidence + - Face recognition systems + - Image classification + """ + ) + return + + +@app.cell(hide_code=True) +def _(mo): + mo.md( + """ + ## 🤔 Test Your Understanding + + Pick which of these statements about Bayes' Theorem you think are correct: + +
+ The posterior probability will always be larger than the prior probability + ❌ Incorrect! Evidence can either increase or decrease our belief in the hypothesis. For example, a negative medical test decreases the probability of having a disease. +
+ +
+ If the likelihood is 0.9 and the prior is 0.5, then the posterior must equal 0.9 + ❌ Incorrect! We also need the false positive rate to calculate the posterior probability. The likelihood alone doesn't determine the posterior. +
+ +
+ The denominator acts as a normalizing constant to ensure the posterior is a valid probability + ✅ Correct! The denominator ensures the posterior probability is between 0 and 1 by considering all ways the evidence could occur. +
+ """ + ) + return + + +@app.cell(hide_code=True) +def _(mo): + mo.md( + """ + ## Summary + + You've learned: + + - The components and intuition behind Bayes' Theorem + - How to update probabilities when new evidence arrives + - Why posterior probabilities can be counterintuitive + - Real-world applications in computer science + + In the next lesson, we'll explore Random Variables, which help us work with numerical outcomes in probability. + """ + ) + return + + +@app.cell(hide_code=True) +def _(mo): + mo.md( + r""" + ### Appendix + Below (hidden) cell blocks are responsible for the interactive example above + """ + ) + return + + +@app.cell(hide_code=True) +def _(p_e_given_h, p_e_given_not_h, p_h): + p_e = p_h.value*p_e_given_h.value + (1 - p_h.value)*p_e_given_not_h.value + bayes_result = p_h.value * p_e_given_h.value / p_e + return bayes_result, p_e + + +@app.cell(hide_code=True) +def _(mo): + p_h = mo.ui.slider(0.0, 1, label="$P(H)$", value=0.1, step=0.1) + p_e_given_h = mo.ui.slider(0.0, 1, label="$P(E \mid H)$", value=0.3, step=0.1) + p_e_given_not_h = mo.ui.slider( + 0.0, 1, label=r"$P(E \mid \neg H)$", value=0.3, step=0.1 + ) + return p_e_given_h, p_e_given_not_h, p_h + + +@app.cell(hide_code=True) +def _(p_e_given_h, p_e_given_not_h, p_h): + def construct_probability_plot(): + import matplotlib.pyplot as plt + + plt.axes() + + # Radius: 1, face-color: red, edge-color: blue + plt.figure(figsize=(6,6)) + base = plt.Rectangle((0, 0), 1, 1, fc="black", ec="white", alpha=0.25) + h = plt.Rectangle((0, 0), p_h.value, 1, fc="yellow", ec="white", label="H") + e_given_h = plt.Rectangle( + (0, 0), + p_h.value, + p_e_given_h.value, + fc="teal", + ec="white", + alpha=0.5, + label="E", + ) + e_given_not_h = plt.Rectangle( + (p_h.value, 0), 1 - p_h.value, p_e_given_not_h.value, fc="teal", ec="white", alpha=0.5 + ) + plt.gca().add_patch(base) + plt.gca().add_patch(h) + plt.gca().add_patch(e_given_not_h) + plt.gca().add_patch(e_given_h) + plt.legend() + return plt.gca() + return (construct_probability_plot,) + + +if __name__ == "__main__": + app.run()