Skip to content

Commit 818363d

Browse files
committed
feat: add mplhep.hist() function to match plt.hist() API
Implements #511 Added a new mplhep.hist() function that provides a convenient way to histogram raw data values while benefiting from the extended features of histplot, including automatic error bar calculation, bin-width normalization, and HEP-style plotting options. The function matches the plt.hist() API but passes data through np.histogram() and then to histplot() for rendering. Key features: - Single or multiple dataset support - Weighted histograms with proper error propagation - Density normalization - All histplot histtypes (step, fill, errorbar, band) - Custom or automatic binning Tests included for all functionality with pytest-mpl visual regression tests to ensure consistent rendering.
1 parent face598 commit 818363d

File tree

6 files changed

+355
-7
lines changed

6 files changed

+355
-7
lines changed

src/mplhep/__init__.py

Lines changed: 9 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -47,6 +47,7 @@
4747
from .label import add_text, append_text, save_variations, savelabels
4848
from .plot import (
4949
funcplot,
50+
hist,
5051
hist2dplot,
5152
histplot,
5253
)
@@ -92,6 +93,7 @@
9293
"EnhancedPlottableHistogram",
9394
"_check_counting_histogram",
9495
"add_text",
96+
"add_text",
9597
"alice",
9698
"append_axes",
9799
"append_text",
@@ -108,10 +110,7 @@
108110
"get_plottables",
109111
"get_pull",
110112
"get_ratio",
111-
"plot_comparison",
112-
"plot_two_hist_comparison",
113-
"plot_model",
114-
"plot_data_model_comparison",
113+
"hist",
115114
"hist2dplot",
116115
# Log plot functions
117116
"histplot",
@@ -122,15 +121,18 @@
122121
"merge_legend_handles_labels",
123122
"mpl_magic",
124123
"plot",
124+
"plot_comparison",
125+
"plot_data_model_comparison",
126+
"plot_model",
127+
"plot_two_hist_comparison",
125128
"rescale_to_axessize",
126129
"save_variations",
127130
"savelabels",
131+
"set_fitting_ylabel_fontsize",
128132
"set_style",
133+
"set_ylow",
129134
"sort_legend",
130135
"style",
131-
"set_ylow",
132136
"yscale_anchored_text",
133137
"yscale_legend",
134-
"add_text",
135-
"set_fitting_ylabel_fontsize",
136138
]

src/mplhep/plot.py

Lines changed: 160 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -83,6 +83,166 @@ def soft_update_kwargs(kwargs, mods, rc=True):
8383

8484
########################################
8585
# Histogram plotter
86+
def hist(
87+
x,
88+
bins=10,
89+
*,
90+
range=None,
91+
density=False,
92+
weights=None,
93+
yerr: ArrayLike | bool | None = True,
94+
histtype: str = "step",
95+
label=None,
96+
ax: mpl.axes.Axes | None = None,
97+
**kwargs,
98+
):
99+
"""
100+
Create histogram from unbinned data, matching `plt.hist` API but using `histplot`.
101+
102+
This function provides a convenient way to histogram raw data values while
103+
benefiting from the extended features of `histplot`, such as automatic error
104+
bar calculation, bin-width normalization, and HEP-style plotting options.
105+
106+
Parameters
107+
----------
108+
x : array-like or list of array-like
109+
Input values to histogram. Can be a single array or a list of arrays
110+
for multiple histograms.
111+
bins : int or sequence, default: 10
112+
Number of bins or bin edges. If an integer, defines the number of
113+
equal-width bins in the range. If a sequence, defines the bin edges.
114+
range : tuple, optional
115+
The lower and upper range of the bins as (min, max). If not provided,
116+
range is (x.min(), x.max()). Values outside the range are ignored.
117+
density : bool, default: False
118+
If True, normalize histogram to form a probability density.
119+
weights : array-like, optional
120+
Array of weights, of the same shape as `x`. Each value in `x`
121+
contributes its associated weight towards the bin count.
122+
yerr : array-like or bool, default: True
123+
Histogram uncertainties. If True (default), sqrt(N) errors or poissonian
124+
interval when weights are specified. Can also be an array of errors.
125+
histtype : {'step', 'fill', 'errorbar', 'band'}, default: "step"
126+
Type of histogram to plot (see `histplot` for details).
127+
label : str or list of str, optional
128+
Label(s) for legend entry.
129+
ax : matplotlib.axes.Axes, optional
130+
Axes object to plot on. If None, uses current axes.
131+
**kwargs
132+
Additional keyword arguments passed to `histplot`.
133+
134+
Returns
135+
-------
136+
List[Hist1DArtists]
137+
Artists created by histplot.
138+
139+
Examples
140+
--------
141+
>>> import mplhep as hep
142+
>>> import numpy as np
143+
>>> data = np.random.normal(100, 15, 1000)
144+
>>> hep.hist(data, bins=50, range=(50, 150))
145+
146+
>>> # Multiple datasets
147+
>>> data1 = np.random.normal(100, 15, 1000)
148+
>>> data2 = np.random.normal(120, 15, 1000)
149+
>>> hep.hist([data1, data2], bins=50, label=['Dataset 1', 'Dataset 2'])
150+
151+
See Also
152+
--------
153+
histplot : Plot pre-binned histograms
154+
matplotlib.pyplot.hist : Matplotlib histogram function
155+
156+
"""
157+
# Store range parameter to avoid shadowing builtin
158+
hist_range = range
159+
160+
# Handle multiple datasets
161+
if isinstance(x, (list, tuple)) and not isinstance(x[0], (int, float, np.number)):
162+
# Multiple datasets - histogram each one
163+
datasets = x
164+
165+
# Process bins - if integer, we need to find a common range
166+
if isinstance(bins, (int, np.integer)):
167+
if hist_range is None:
168+
# Find common range across all datasets
169+
all_data = np.concatenate([np.asarray(d).ravel() for d in datasets])
170+
hist_range = (np.min(all_data), np.max(all_data))
171+
bin_edges = np.linspace(hist_range[0], hist_range[1], bins + 1)
172+
else:
173+
bin_edges = np.asarray(bins)
174+
175+
# Histogram each dataset
176+
hist_values = []
177+
hist_w2 = []
178+
for dataset in datasets:
179+
data_arr = np.asarray(dataset).ravel()
180+
w = None if weights is None else np.asarray(weights).ravel()
181+
182+
h, _ = np.histogram(data_arr, bins=bin_edges, weights=w, density=False)
183+
hist_values.append(h)
184+
185+
# Calculate w2 for error estimation if weights are provided
186+
if w is not None:
187+
h_w2, _ = np.histogram(
188+
data_arr, bins=bin_edges, weights=w**2, density=False
189+
)
190+
hist_w2.append(h_w2)
191+
192+
# Pass to histplot
193+
w2_arg = hist_w2 if weights is not None and len(hist_w2) > 0 else None
194+
# If w2 is provided, don't pass yerr (w2 will be used for error calculation)
195+
# If yerr is explicitly an array, still pass it
196+
yerr_arg = None if w2_arg is not None and isinstance(yerr, bool) else yerr
197+
return histplot(
198+
hist_values,
199+
bin_edges,
200+
yerr=yerr_arg,
201+
w2=w2_arg,
202+
density=density,
203+
histtype=histtype,
204+
label=label,
205+
ax=ax,
206+
**kwargs,
207+
)
208+
# Single dataset
209+
x = np.asarray(x).ravel()
210+
w = None if weights is None else np.asarray(weights).ravel()
211+
212+
# Create histogram
213+
if isinstance(bins, (int, np.integer)):
214+
if hist_range is None:
215+
hist_range = (np.min(x), np.max(x))
216+
bin_edges = np.linspace(hist_range[0], hist_range[1], bins + 1)
217+
else:
218+
bin_edges = np.asarray(bins)
219+
220+
h, _ = np.histogram(x, bins=bin_edges, weights=w, density=False)
221+
222+
# Calculate w2 for error estimation if weights are provided
223+
w2_arg = None
224+
if w is not None:
225+
h_w2, _ = np.histogram(x, bins=bin_edges, weights=w**2, density=False)
226+
w2_arg = h_w2
227+
228+
# If w2 is provided, don't pass yerr (w2 will be used for error calculation)
229+
# If yerr is explicitly an array, still pass it
230+
yerr_arg = None if w2_arg is not None and isinstance(yerr, bool) else yerr
231+
232+
# Pass to histplot
233+
return histplot(
234+
h,
235+
bin_edges,
236+
yerr=yerr_arg,
237+
w2=w2_arg,
238+
density=density,
239+
histtype=histtype,
240+
label=label,
241+
ax=ax,
242+
**kwargs,
243+
)
244+
245+
86246
def histplot(
87247
H, # Histogram object, tuple or array
88248
bins=None, # Bins to be supplied when h is a value array or iterable of array
11.1 KB
Loading
20.8 KB
Loading
16.3 KB
Loading

0 commit comments

Comments
 (0)