Skip to content

Commit 2da4d32

Browse files
committed
dist
1 parent 2d8afa7 commit 2da4d32

File tree

17 files changed

+26793
-70
lines changed

17 files changed

+26793
-70
lines changed

docs/api/geoms.md

Lines changed: 12 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -50,6 +50,10 @@ Geometric objects (geoms) are the visual elements used to represent data in a pl
5050
options:
5151
show_root_heading: true
5252

53+
::: ggplotly.geoms.geom_norm.geom_norm
54+
options:
55+
show_root_heading: true
56+
5357
## Area Geoms
5458

5559
::: ggplotly.geoms.geom_area.geom_area
@@ -70,6 +74,14 @@ Geometric objects (geoms) are the visual elements used to represent data in a pl
7074
options:
7175
show_root_heading: true
7276

77+
::: ggplotly.geoms.geom_qq.geom_qq
78+
options:
79+
show_root_heading: true
80+
81+
::: ggplotly.geoms.geom_qq_line.geom_qq_line
82+
options:
83+
show_root_heading: true
84+
7385
## Text and Annotation
7486

7587
::: ggplotly.geoms.geom_text.geom_text

docs/api/stats.md

Lines changed: 18 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -55,3 +55,21 @@ Statistical transformations for data.
5555
::: ggplotly.stats.stat_fanchart.stat_fanchart
5656
options:
5757
show_root_heading: true
58+
59+
## stat_function
60+
61+
::: ggplotly.stats.stat_function.stat_function
62+
options:
63+
show_root_heading: true
64+
65+
## stat_qq
66+
67+
::: ggplotly.stats.stat_qq.stat_qq
68+
options:
69+
show_root_heading: true
70+
71+
## stat_qq_line
72+
73+
::: ggplotly.stats.stat_qq_line.stat_qq_line
74+
options:
75+
show_root_heading: true

docs/gallery/statistical.ipynb

Lines changed: 25550 additions & 67 deletions
Large diffs are not rendered by default.

ggplotly/__init__.py

Lines changed: 12 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -24,11 +24,14 @@
2424
geom_line,
2525
geom_lines,
2626
geom_map,
27+
geom_norm,
2728
geom_ohlc,
2829
geom_pacf,
2930
geom_path,
3031
geom_point,
3132
geom_point_3d,
33+
geom_qq,
34+
geom_qq_line,
3235
geom_range,
3336
geom_ribbon,
3437
geom_rug,
@@ -78,7 +81,10 @@
7881
stat_density,
7982
stat_ecdf,
8083
stat_fanchart,
84+
stat_function,
8185
stat_identity,
86+
stat_qq,
87+
stat_qq_line,
8288
stat_smooth,
8389
stat_stl,
8490
stat_summary,
@@ -180,6 +186,9 @@
180186
"geom_stl",
181187
"geom_acf",
182188
"geom_pacf",
189+
"geom_norm",
190+
"geom_qq",
191+
"geom_qq_line",
183192
"scale_x_continuous",
184193
"scale_y_continuous",
185194
"scale_color_manual",
@@ -206,6 +215,9 @@
206215
"stat_summary",
207216
"stat_contour",
208217
"stat_fanchart",
218+
"stat_function",
219+
"stat_qq",
220+
"stat_qq_line",
209221
"stat_stl",
210222
"data",
211223
"map_data",

ggplotly/geoms/__init__.py

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -20,8 +20,11 @@
2020
from .geom_line import geom_line
2121
from .geom_lines import geom_lines
2222
from .geom_map import geom_map, geom_sf
23+
from .geom_norm import geom_norm
2324
from .geom_pacf import geom_pacf
2425
from .geom_path import geom_path
26+
from .geom_qq import geom_qq
27+
from .geom_qq_line import geom_qq_line
2528
from .geom_point import geom_point
2629
from .geom_point_3d import geom_point_3d
2730
from .geom_range import geom_range
@@ -79,4 +82,7 @@
7982
"geom_stl",
8083
"geom_acf",
8184
"geom_pacf",
85+
"geom_norm",
86+
"geom_qq",
87+
"geom_qq_line",
8288
]

ggplotly/geoms/geom_norm.py

Lines changed: 138 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,138 @@
1+
# geoms/geom_norm.py
2+
3+
import numpy as np
4+
import plotly.graph_objects as go
5+
6+
from .geom_base import Geom
7+
8+
9+
class geom_norm(Geom):
10+
"""
11+
Geom for overlaying a normal distribution curve.
12+
13+
Automatically fits a normal distribution to the data (using mean and std)
14+
unless mean and sd are explicitly provided. Useful for comparing actual
15+
data distribution to theoretical normal distribution.
16+
17+
Parameters
18+
----------
19+
data : DataFrame, optional
20+
Data for the geom (overrides plot data).
21+
mapping : aes, optional
22+
Aesthetic mappings. Uses x aesthetic to determine data range.
23+
mean : float, optional
24+
Mean of the normal distribution. If None, computed from data.
25+
sd : float, optional
26+
Standard deviation of the normal distribution. If None, computed from data.
27+
scale : str, optional
28+
Output scale: 'density' (default) outputs PDF values, 'count' scales
29+
to match histogram counts (PDF * n * binwidth). When 'count', automatically
30+
estimates binwidth from data range and number of observations.
31+
binwidth : float, optional
32+
Bin width for count scaling. If None, estimated as data_range / 30.
33+
n : int, optional
34+
Number of points for the curve. Default is 101.
35+
color : str, optional
36+
Color of the line. Default is 'red'.
37+
size : float, optional
38+
Width of the line. Default is 2.
39+
linetype : str, optional
40+
Line style ('solid', 'dash', etc.). Default is 'solid'.
41+
42+
Examples
43+
--------
44+
>>> # With density-scaled histogram (default)
45+
>>> ggplot(df, aes(x='x')) + geom_histogram(aes(y=after_stat('density'))) + geom_norm()
46+
47+
>>> # With count histogram (no density scaling needed on histogram)
48+
>>> ggplot(df, aes(x='x')) + geom_histogram(bins=30) + geom_norm(scale='count')
49+
50+
>>> # Explicit parameters
51+
>>> ggplot(df, aes(x='x')) + geom_histogram(bins=30) + geom_norm(scale='count', mean=0, sd=1)
52+
53+
>>> # Styled
54+
>>> ggplot(df, aes(x='x')) + geom_histogram(aes(y=after_stat('density'))) + geom_norm(color='blue', size=3)
55+
"""
56+
57+
default_params = {
58+
"n": 101,
59+
"color": "red",
60+
"size": 2,
61+
"linetype": "solid",
62+
"scale": "density",
63+
}
64+
65+
def __init__(self, data=None, mapping=None, mean=None, sd=None,
66+
scale="density", binwidth=None, **params):
67+
super().__init__(data, mapping, **params)
68+
self.mean = mean
69+
self.sd = sd
70+
self.scale = scale
71+
self.binwidth = binwidth
72+
73+
def _draw_impl(self, fig, data, row, col):
74+
from scipy.stats import norm
75+
76+
# Get parameters
77+
n = self.params.get("n", 101)
78+
color = self.params.get("color", "red")
79+
size = self.params.get("size", 2)
80+
linetype = self.params.get("linetype", "solid")
81+
82+
# Get x column
83+
x_col = self.mapping.get('x') if self.mapping else None
84+
if x_col is None or x_col not in data.columns:
85+
raise ValueError("geom_norm requires x aesthetic")
86+
87+
x_data = data[x_col].dropna()
88+
n_obs = len(x_data)
89+
90+
# Compute or use provided mean/sd
91+
mean = self.mean if self.mean is not None else x_data.mean()
92+
sd = self.sd if self.sd is not None else x_data.std()
93+
94+
# Generate x range (extend beyond data range)
95+
x_min, x_max = x_data.min(), x_data.max()
96+
x_range = x_max - x_min
97+
x_min -= x_range * 0.05
98+
x_max += x_range * 0.05
99+
100+
# Compute normal PDF
101+
x_vals = np.linspace(x_min, x_max, n)
102+
y_vals = norm.pdf(x_vals, mean, sd)
103+
104+
# Scale to counts if requested
105+
if self.scale == 'count':
106+
# Estimate binwidth if not provided (default 30 bins like geom_histogram)
107+
binwidth = self.binwidth if self.binwidth is not None else x_range / 30
108+
y_vals = y_vals * n_obs * binwidth
109+
y_label = 'count'
110+
else:
111+
y_label = 'density'
112+
113+
# Map linetype to Plotly dash
114+
dash_map = {
115+
'solid': 'solid',
116+
'dashed': 'dash',
117+
'dash': 'dash',
118+
'dotted': 'dot',
119+
'dot': 'dot',
120+
'longdash': 'longdash',
121+
'dashdot': 'dashdot',
122+
'twodash': 'dashdot',
123+
}
124+
dash = dash_map.get(linetype, 'solid')
125+
126+
# Add trace
127+
fig.add_trace(
128+
go.Scatter(
129+
x=x_vals,
130+
y=y_vals,
131+
mode='lines',
132+
line=dict(color=color, width=size, dash=dash),
133+
name=f'Normal(\u03bc={mean:.2f}, \u03c3={sd:.2f})',
134+
showlegend=True,
135+
hovertemplate=f'x: %{{x:.2f}}<br>{y_label}: %{{y:.4f}}<extra></extra>',
136+
),
137+
row=row, col=col,
138+
)

ggplotly/geoms/geom_qq.py

Lines changed: 133 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,133 @@
1+
# geoms/geom_qq.py
2+
3+
import plotly.graph_objects as go
4+
5+
from ..stats.stat_qq import stat_qq
6+
from .geom_base import Geom
7+
8+
9+
class geom_qq(Geom):
10+
"""
11+
Geom for creating Q-Q (quantile-quantile) plots.
12+
13+
Displays sample quantiles against theoretical quantiles from a specified
14+
distribution. By default uses the standard normal distribution.
15+
16+
Parameters
17+
----------
18+
distribution : scipy.stats distribution, optional
19+
A scipy.stats distribution object with a ppf method.
20+
Default is scipy.stats.norm.
21+
dparams : dict, optional
22+
Additional parameters to pass to the distribution's ppf method.
23+
For example, {'df': 5} for a t-distribution.
24+
color : str, optional
25+
Color of the points.
26+
size : float, optional
27+
Size of the points. Default is 8.
28+
alpha : float, optional
29+
Transparency level for the points. Default is 1.
30+
shape : str, optional
31+
Shape of the points.
32+
33+
Aesthetics
34+
----------
35+
sample : str (required)
36+
Column name containing the sample data to compare against
37+
the theoretical distribution.
38+
color : str, optional
39+
Grouping variable for colored points.
40+
group : str, optional
41+
Grouping variable for separate Q-Q plots.
42+
43+
See Also
44+
--------
45+
geom_qq_line : Reference line for Q-Q plots
46+
stat_qq : Underlying stat for quantile computation
47+
48+
Examples
49+
--------
50+
>>> import numpy as np
51+
>>> import pandas as pd
52+
>>> from scipy import stats
53+
>>>
54+
>>> # Basic Q-Q plot against normal distribution
55+
>>> df = pd.DataFrame({'values': np.random.randn(100)})
56+
>>> (ggplot(df, aes(sample='values'))
57+
... + geom_qq())
58+
>>>
59+
>>> # Q-Q plot with reference line
60+
>>> (ggplot(df, aes(sample='values'))
61+
... + geom_qq()
62+
... + geom_qq_line())
63+
>>>
64+
>>> # Q-Q plot against t-distribution
65+
>>> (ggplot(df, aes(sample='values'))
66+
... + geom_qq(distribution=stats.t, dparams={'df': 5}))
67+
>>>
68+
>>> # Q-Q plot with color grouping
69+
>>> df = pd.DataFrame({
70+
... 'values': np.concatenate([np.random.randn(50), np.random.randn(50) + 2]),
71+
... 'group': ['A'] * 50 + ['B'] * 50
72+
... })
73+
>>> (ggplot(df, aes(sample='values', color='group'))
74+
... + geom_qq())
75+
"""
76+
77+
default_params = {"size": 8}
78+
79+
def __init__(self, data=None, mapping=None, distribution=None, dparams=None, **params):
80+
super().__init__(data, mapping, **params)
81+
self.distribution = distribution
82+
self.dparams = dparams if dparams is not None else {}
83+
84+
def draw(self, fig, data=None, row=1, col=1):
85+
"""
86+
Draw the Q-Q plot on the figure.
87+
88+
Overrides base draw to create stat with current mapping (after merge).
89+
"""
90+
data = data if data is not None else self.data
91+
92+
# Create stat with current mapping (now includes global mapping)
93+
qq_stat = stat_qq(
94+
data=data,
95+
mapping=self.mapping,
96+
distribution=self.distribution,
97+
dparams=self.dparams
98+
)
99+
100+
# Apply stat to transform data
101+
data, self.mapping = qq_stat.compute(data)
102+
103+
# Delegate to implementation
104+
self._draw_impl(fig, data, row, col)
105+
106+
def _draw_impl(self, fig, data, row, col):
107+
"""
108+
Draw Q-Q plot points on the figure.
109+
110+
Parameters
111+
----------
112+
fig : Figure
113+
Plotly figure object.
114+
data : DataFrame
115+
Data (already transformed by stat_qq).
116+
row : int
117+
Row position in subplot.
118+
col : int
119+
Column position in subplot.
120+
"""
121+
plot = go.Scatter
122+
payload = dict(
123+
mode="markers",
124+
name=self.params.get("name", "Q-Q"),
125+
)
126+
127+
color_targets = dict(
128+
color="marker_color",
129+
size="marker_size",
130+
shape="marker_symbol",
131+
)
132+
133+
self._transform_fig(plot, fig, data, payload, color_targets, row, col)

0 commit comments

Comments
 (0)