Skip to content

Commit 144befb

Browse files
dangotbannedSerge-Étienne Parentmattijn
authored
docs(example): Adds Confidence Interval Ellipses (#3747)
* Create deviation_ellipses.py example showing bivariate deviation ellipses of petal length and width of three iris species * docs: Initial rewrite of (#514) Happy with the end result, but not comfortable merging so much complexity I don't understand yet #3715 * ci(typing): Adds `scipy-stubs` to `altair[doc]` `scipy` is only used for one example in the user guide, but this will be the second https://docs.scipy.org/doc/scipy/release/1.15.0-notes.html#other-changes * fix: Only install `scipy-stubs` on `>=3.10` * chore(typing): Ignore incorrect `pandas` stubs * ci(typing): ignore `scipy` on `3.9` https://mypy.readthedocs.io/en/stable/running_mypy.html#missing-library-stubs-or-py-typed-marker https://github.com/vega/altair/actions/runs/12612565960/job/35149436953?pr=3747 * docs: Add missing category * fix: Add missing support for `from __future__ import annotations` Fixes https://github.com/vega/altair/actions/runs/12612637008/job/35149593128?pr=3747#step:6:25 * test: skip example when `scipy` not installed Temporary fix for https://github.com/vega/altair/actions/runs/12612997919/job/35150338097?pr=3747 * docs: reduce segments `100` -> `50` Observed no visible reduction in quality. Slightly visible at `<=40` * docs: Clean up `numpy`, `scipy` docs/comments * refactor: Simplify `numpy` transforms * docs: add tooltip, increase size * fix: Remove incorrect range stop Previously returned `segments+1` rows, but this isn't specified in `ggplot2 https://github.com/tidyverse/ggplot2/blob/efc53cc000e7d86e3db22e1f43089d366fe24f2e/R/stat-ellipse.R#L122 * refactor: Remove special casing `__future__` import I forgot that the only requirement was that the import is the **first statement**. Partially reverts (7cd2a77) * docs: Remove unused `method` code Also resolves #3747 (comment) * docs: rename to 'Confidence Interval Ellipses' * docs: add references to description * docs: Adds methods syntax version Includes comment removal suggestion in (#3747 (comment)) * refactor: Rewrite `pd_ellipse` - Fixed a type ignore (causes by incomplete stubs) - Renamed variables - Make replace the implicit `"index"` column with naming it `"order"` #3747 (comment) * ci(uv): sync `scipy-stubs` dc7639d a296b82 * refactor(typing): Try removing `from __future__ import annotations` #3747 (comment), #3747 (comment) * refactor: rename `np_ellipse` -> `confidence_region_2d` #3747 (comment) * refactor: rename `pd_ellipse` -> `grouped_confidence_regions` #3747 (comment) * docs: change category to `"case studies"` #3747 (comment) * styling --------- Co-authored-by: Serge-Étienne Parent <[email protected]> Co-authored-by: Mattijn van Hoek <[email protected]>
1 parent 21f5849 commit 144befb

File tree

6 files changed

+213
-3
lines changed

6 files changed

+213
-3
lines changed

pyproject.toml

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -99,6 +99,7 @@ doc = [
9999
"sphinx_copybutton",
100100
"sphinx-design",
101101
"scipy",
102+
"scipy-stubs; python_version>=\"3.10\"",
102103
]
103104

104105
[tool.altair.vega]
@@ -314,8 +315,10 @@ module = [
314315
"ipykernel.*",
315316
"ibis.*",
316317
"vegafusion.*",
318+
"scipy.*"
317319
]
318320
ignore_missing_imports = true
321+
disable_error_code = ["import-untyped"]
319322

320323
[tool.pyright]
321324
enableExperimentalFeatures=true

sphinxext/utils.py

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -58,7 +58,7 @@ def create_generic_image(
5858
"""
5959

6060

61-
def _parse_source_file(filename: str) -> tuple[ast.Module | None, str]:
61+
def _parse_source_file(filename: str | Path) -> tuple[ast.Module | None, str]:
6262
"""
6363
Parse source file into AST node.
6464
@@ -88,7 +88,7 @@ def _parse_source_file(filename: str) -> tuple[ast.Module | None, str]:
8888
return node, content
8989

9090

91-
def get_docstring_and_rest(filename: str) -> tuple[str, str | None, str, int]:
91+
def get_docstring_and_rest(filename: str | Path) -> tuple[str, str | None, str, int]:
9292
"""
9393
Separate ``filename`` content between docstring and the rest.
9494
@@ -160,7 +160,7 @@ def get_docstring_and_rest(filename: str) -> tuple[str, str | None, str, int]:
160160
if (
161161
node.body
162162
and isinstance(node.body[0], ast.Expr)
163-
and isinstance(node.body[0].value, (ast.Str, ast.Constant))
163+
and isinstance(node.body[0].value, ast.Constant)
164164
):
165165
docstring_node = node.body[0]
166166
docstring = docstring_node.value.s # pyright: ignore[reportAttributeAccessIssue]

tests/__init__.py

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -96,6 +96,10 @@ def windows_has_tzdata() -> bool:
9696
https://github.com/vega/vegafusion
9797
"""
9898

99+
skip_requires_scipy: pytest.MarkDecorator = pytest.mark.skipif(
100+
find_spec("scipy") is None, reason="`scipy` not installed."
101+
)
102+
99103

100104
@overload
101105
def skip_requires_pyarrow(
@@ -236,6 +240,7 @@ def _distributed_examples(
236240
"wind_vector_map": slow,
237241
r"\.point_map\.py": slow,
238242
"line_chart_with_color_datum": slow,
243+
"deviation_ellipses": skip_requires_scipy,
239244
},
240245
)
241246
),
Lines changed: 88 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,88 @@
1+
"""
2+
Confidence Interval Ellipses
3+
----------------------------
4+
This example shows bivariate deviation ellipses of petal length and width of three iris species.
5+
6+
Inspired by `ggplot2.stat_ellipse`_ and directly based on `Deviation ellipses example`_ by `@essicolo`_
7+
8+
.. _ggplot2.stat_ellipse:
9+
https://ggplot2.tidyverse.org/reference/stat_ellipse.html#ref-examples
10+
.. _Deviation ellipses example:
11+
https://github.com/vega/altair/pull/514
12+
.. _@essicolo:
13+
https://github.com/essicolo
14+
"""
15+
16+
# category: case studies
17+
import numpy as np
18+
import pandas as pd
19+
from scipy.stats import f as F
20+
21+
import altair as alt
22+
from vega_datasets import data
23+
24+
25+
def confidence_region_2d(arr, conf_level=0.95, segments=50):
26+
"""
27+
Calculate confidence interval ellipse.
28+
29+
Parameters
30+
----------
31+
arr
32+
numpy array with 2 columns
33+
conf_level
34+
lower tail probability
35+
segments
36+
number of points describing the ellipse.
37+
"""
38+
n_elements = len(arr)
39+
# Degrees of freedom of the chi-squared distribution in the **numerator**
40+
dfn = 2
41+
# Degrees of freedom of the chi-squared distribution in the **denominator**
42+
dfd = n_elements - 1
43+
# Percent point function at `conf_level` of an F continuous random variable
44+
quantile = F.ppf(conf_level, dfn=dfn, dfd=dfd)
45+
radius = np.sqrt(2 * quantile)
46+
angles = np.arange(0, segments) * 2 * np.pi / segments
47+
circle = np.column_stack((np.cos(angles), np.sin(angles)))
48+
center = np.mean(arr, axis=0)
49+
cov_mat = np.cov(arr, rowvar=False)
50+
return center + radius * (circle @ np.linalg.cholesky(cov_mat).T)
51+
52+
53+
def grouped_confidence_regions(df, col_x, col_y, col_group):
54+
cols = [col_x, col_y]
55+
ellipses = []
56+
ser: pd.Series[float] = df[col_group]
57+
for group in ser.drop_duplicates():
58+
arr = df.loc[ser == group, cols].to_numpy()
59+
ellipse = pd.DataFrame(confidence_region_2d(arr), columns=cols)
60+
ellipse[col_group] = group
61+
ellipses.append(ellipse)
62+
return pd.concat(ellipses).reset_index(names="order")
63+
64+
65+
col_x = "petalLength"
66+
col_y = "petalWidth"
67+
col_group = "species"
68+
69+
x = alt.X(col_x, scale=alt.Scale(zero=False))
70+
y = alt.Y(col_y, scale=alt.Scale(zero=False))
71+
color = alt.Color(col_group)
72+
73+
source = data.iris()
74+
ellipse = grouped_confidence_regions(source, col_x=col_x, col_y=col_y, col_group=col_group)
75+
points = alt.Chart(source).mark_circle(size=50, tooltip=True).encode(
76+
x=x,
77+
y=y,
78+
color=color
79+
)
80+
lines = alt.Chart(ellipse).mark_line(filled=True, fillOpacity=0.2).encode(
81+
x=x,
82+
y=y,
83+
color=color,
84+
order="order"
85+
)
86+
87+
chart = (lines + points).properties(height=500, width=500)
88+
chart
Lines changed: 88 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,88 @@
1+
"""
2+
Confidence Interval Ellipses
3+
----------------------------
4+
This example shows bivariate deviation ellipses of petal length and width of three iris species.
5+
6+
Inspired by `ggplot2.stat_ellipse`_ and directly based on `Deviation ellipses example`_ by `@essicolo`_
7+
8+
.. _ggplot2.stat_ellipse:
9+
https://ggplot2.tidyverse.org/reference/stat_ellipse.html#ref-examples
10+
.. _Deviation ellipses example:
11+
https://github.com/vega/altair/pull/514
12+
.. _@essicolo:
13+
https://github.com/essicolo
14+
"""
15+
16+
# category: case studies
17+
import numpy as np
18+
import pandas as pd
19+
from scipy.stats import f as F
20+
21+
import altair as alt
22+
from vega_datasets import data
23+
24+
25+
def confidence_region_2d(arr, conf_level=0.95, segments=50):
26+
"""
27+
Calculate confidence interval ellipse.
28+
29+
Parameters
30+
----------
31+
arr
32+
numpy array with 2 columns
33+
conf_level
34+
lower tail probability
35+
segments
36+
number of points describing the ellipse.
37+
"""
38+
n_elements = len(arr)
39+
# Degrees of freedom of the chi-squared distribution in the **numerator**
40+
dfn = 2
41+
# Degrees of freedom of the chi-squared distribution in the **denominator**
42+
dfd = n_elements - 1
43+
# Percent point function at `conf_level` of an F continuous random variable
44+
quantile = F.ppf(conf_level, dfn=dfn, dfd=dfd)
45+
radius = np.sqrt(2 * quantile)
46+
angles = np.arange(0, segments) * 2 * np.pi / segments
47+
circle = np.column_stack((np.cos(angles), np.sin(angles)))
48+
center = np.mean(arr, axis=0)
49+
cov_mat = np.cov(arr, rowvar=False)
50+
return center + radius * (circle @ np.linalg.cholesky(cov_mat).T)
51+
52+
53+
def grouped_confidence_regions(df, col_x, col_y, col_group):
54+
cols = [col_x, col_y]
55+
ellipses = []
56+
ser: pd.Series[float] = df[col_group]
57+
for group in ser.drop_duplicates():
58+
arr = df.loc[ser == group, cols].to_numpy()
59+
ellipse = pd.DataFrame(confidence_region_2d(arr), columns=cols)
60+
ellipse[col_group] = group
61+
ellipses.append(ellipse)
62+
return pd.concat(ellipses).reset_index(names="order")
63+
64+
65+
col_x = "petalLength"
66+
col_y = "petalWidth"
67+
col_group = "species"
68+
69+
x = alt.X(col_x).scale(zero=False)
70+
y = alt.Y(col_y).scale(zero=False)
71+
color = alt.Color(col_group)
72+
73+
source = data.iris()
74+
ellipse = grouped_confidence_regions(source, col_x=col_x, col_y=col_y, col_group=col_group)
75+
points = alt.Chart(source).mark_circle(size=50, tooltip=True).encode(
76+
x=x,
77+
y=y,
78+
color=color
79+
)
80+
lines = alt.Chart(ellipse).mark_line(filled=True, fillOpacity=0.2).encode(
81+
x=x,
82+
y=y,
83+
color=color,
84+
order="order"
85+
)
86+
87+
chart = (lines + points).properties(height=500, width=500)
88+
chart

uv.lock

Lines changed: 26 additions & 0 deletions
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.

0 commit comments

Comments
 (0)