diff --git a/.gitignore b/.gitignore index 00e37a55..84ecf204 100644 --- a/.gitignore +++ b/.gitignore @@ -127,3 +127,6 @@ venv.bak/ /docs/source/_generated/ /tmp/ /docs/source/_static/_generated/ +# pixi environments +.pixi/* +!.pixi/config.toml diff --git a/pyproject.toml b/pyproject.toml index 135d6b08..b209c347 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -98,38 +98,6 @@ environments = [ "python_version >= '3.10'", ] -[tool.pixi.workspace] -name = "mplhep" -authors = ["andrzejnovak "] -description = "Matplotlib styles for HEP" -channels = ["conda-forge"] -platforms = ["linux-64", "osx-64", "osx-arm64", "win-64"] - -[tool.pixi.pypi-dependencies] -mplhep = { path = ".", editable = true, extras = ["dev", "test"] } - -[tool.pixi.environments] -default = { solve-group = "default" } -test = { solve-group = "default" } - -[tool.pixi.tasks] -# Testing -test = "python -m pytest -r sa --mpl --mpl-results-path=pytest_results -n auto" -test-benchmark = "python -m pytest --benchmark-only --benchmark-sort=mean --benchmark-warmup=on" -test-baseline = "python -m pytest -r sa --mpl -n auto --mpl-generate-path=tests/baseline" - -# Linting & formatting (via pre-commit hooks) -precommit = "prek run --all-files" -prek = "prek run --all-files" - -# Documentation -docs-build = { cmd = "mkdocs build --clean", cwd = "new_docs" } -docs-build-fast = { cmd = "mkdocs build -f mkdocs_norender.yml", cwd = "new_docs" } -docs-serve = { cmd = "mkdocs serve --dev-addr 127.0.0.1:8000", cwd = "new_docs" } -docs-serve-fast = { cmd = "mkdocs serve -f mkdocs_norender.yml --dev-addr 127.0.0.1:8000", cwd = "new_docs" } - -# Cleanup -clean = "rm -rf pytest_results __pycache__ .pytest_cache .coverage .mypy_cache htmlcov dist build *.egg-info" [tool.mypy] @@ -215,3 +183,36 @@ flake8-builtins.ignorelist = ["copyright", "range"] "{new_docs,docs,examples}/**" = ["INP001", "S", "T20", "LOG015"] "src/mplhep/_dev.py" = ["T20"] "src/mplhep/styles/*.py" = ["FLY002"] + +[tool.pixi.workspace] +name = "mplhep" +authors = ["andrzejnovak "] +description = "Matplotlib styles for HEP" +channels = ["conda-forge"] +platforms = ["linux-64", "osx-64", "osx-arm64", "win-64"] + +[tool.pixi.pypi-dependencies] +mplhep = { path = ".", editable = true, extras = ["dev", "test"] } + +[tool.pixi.environments] +default = { solve-group = "default" } +test = { solve-group = "default" } + +[tool.pixi.tasks] +# Testing +test = "python -m pytest -r sa --mpl --mpl-results-path=pytest_results -n auto" +test-benchmark = "python -m pytest --benchmark-only --benchmark-sort=mean --benchmark-warmup=on" +test-baseline = "python -m pytest -r sa --mpl -n auto --mpl-generate-path=tests/baseline" + +# Linting & formatting (via pre-commit hooks) +precommit = "prek run --all-files" +prek = "prek run --all-files" + +# Documentation +docs-build = { cmd = "mkdocs build --clean", cwd = "new_docs" } +docs-build-fast = { cmd = "mkdocs build -f mkdocs_norender.yml", cwd = "new_docs" } +docs-serve = { cmd = "mkdocs serve --dev-addr 127.0.0.1:8000", cwd = "new_docs" } +docs-serve-fast = { cmd = "mkdocs serve -f mkdocs_norender.yml --dev-addr 127.0.0.1:8000", cwd = "new_docs" } + +# Cleanup +clean = "rm -rf pytest_results __pycache__ .pytest_cache .coverage .mypy_cache htmlcov dist build *.egg-info" diff --git a/src/mplhep/comparison_plotters.py b/src/mplhep/comparison_plotters.py index 3a238b1a..10afe7d8 100644 --- a/src/mplhep/comparison_plotters.py +++ b/src/mplhep/comparison_plotters.py @@ -59,6 +59,7 @@ def hists( ylabel=None, h1_label="h1", h2_label="h2", + flow="hint", fig=None, ax_main=None, ax_comparison=None, @@ -81,6 +82,11 @@ def hists( The label for the first histogram. Default is "h1". h2_label : str, optional The label for the second histogram. Default is "h2". + flow : str, optional + Whether to plot the under/overflow bin. If "show", add additional under/overflow bin. + If "sum", add the under/overflow bin content to first/last bin. + If "hint", draw markers at the axis to indicate presence of under/overflow. + If "none", do nothing. Default is "hint". fig : matplotlib.figure.Figure or None, optional The figure to use for the plot. If fig, ax_main and ax_comparison are None, a new figure will be created. Default is None. ax_main : matplotlib.axes.Axes or None, optional @@ -117,27 +123,84 @@ def hists( msg = "Need to provide fig, ax_main and ax_comparison (or none of them)." raise ValueError(msg) - xlim = (h1_plottable.edges_1d()[0], h1_plottable.edges_1d()[-1]) + histplot(h1, ax=ax_main, label=h1_label, histtype="step", flow=flow) + histplot(h2, ax=ax_main, label=h2_label, histtype="step", flow=flow) + + # Only set xlim if not showing flow bins (histplot handles xlim for flow="show") + if flow != "show": + xlim = (h1_plottable.edges_1d()[0], h1_plottable.edges_1d()[-1]) + ax_main.set_xlim(xlim) - histplot(h1_plottable, ax=ax_main, label=h1_label, histtype="step") - histplot(h2_plottable, ax=ax_main, label=h2_label, histtype="step") - ax_main.set_xlim(xlim) ax_main.set_ylabel(ylabel) ax_main.legend() - _ = ax_main.xaxis.set_ticklabels([]) + # For flow="show", don't clear ticklabels here (histplot set custom labels) + # tick_params(labelbottom=False) at the end will hide them instead + if flow != "show": + _ = ax_main.xaxis.set_ticklabels([]) comparison( - h1_plottable, - h2_plottable, + h1, + h2, ax_comparison, xlabel=xlabel, h1_label=h1_label, h2_label=h2_label, + flow=flow, **comparison_kwargs, ) fig.align_ylabels() + # Ensure tick labels appear only on the comparison axis, not on the main axis + # This must be done at the end, after all plotting, because matplotlib's sharex + # mechanism may override labels set during plotting + if flow == "show": + # For flow="show", regenerate tick labels with indicators for flow bins + # matplotlib's sharex clears labels, so we must regenerate from histogram edges + ax_main.tick_params(labelbottom=False) + ax_comparison.tick_params(labelbottom=True) + + # Get histogram edges to identify flow bin boundaries + # Note: h1_plottable has original edges [0, 1, 2, ..., 10], not flow-extended + edges = h1_plottable.edges_1d() + tick_positions = ax_comparison.get_xticks() + + # Check if there are flow labels (ticks in underflow/overflow regions) + has_underflow = any(tick < edges[0] for tick in tick_positions) + has_overflow = any(tick > edges[-1] for tick in tick_positions) + + # Filter out edge ticks that would overlap with flow labels + filtered_ticks = [] + tick_labels = [] + + for tick in tick_positions: + # Skip edge ticks that would overlap with adjacent flow labels + if has_underflow and abs(tick - edges[0]) < 1e-10: + continue # Skip tick at first regular edge (e.g., 0) + if has_overflow and abs(tick - edges[-1]) < 1e-10: + continue # Skip tick at last regular edge (e.g., 10) + + filtered_ticks.append(tick) + # Check if this tick is outside the regular histogram range (flow bins) + if tick < edges[0]: # Underflow bin + tick_labels.append(f"<{edges[0]:g}") + elif tick > edges[-1]: # Overflow bin + tick_labels.append(f">{edges[-1]:g}") + else: + tick_labels.append(f"{tick:g}") + + ax_comparison.set_xticks(filtered_ticks) + ax_comparison.set_xticklabels(tick_labels) + else: + # For other flow options, control label visibility and regenerate labels + ax_main.tick_params(labelbottom=False) + ax_comparison.tick_params(labelbottom=True) + # Explicitly regenerate tick labels on the comparison axis + # (they may have been set to empty strings during plotting) + tick_positions = ax_comparison.get_xticks() + ax_comparison.set_xticks(tick_positions) + ax_comparison.set_xticklabels([f"{tick:g}" for tick in tick_positions]) + return fig, ax_main, ax_comparison @@ -152,6 +215,7 @@ def comparison( comparison_ylabel=None, comparison_ylim=None, h1_w2method="sqrt", + flow="hint", **histplot_kwargs, ): """ @@ -181,6 +245,11 @@ def comparison( h1_w2method : str, optional What kind of bin uncertainty to use for h1: "sqrt" for the Poisson standard deviation derived from the variance stored in the histogram object, "poisson" for asymmetrical uncertainties based on a Poisson confidence interval. Default is "sqrt". Asymmetrical uncertainties are not supported for the asymmetry and efficiency comparisons. + flow : str, optional + Whether to plot the under/overflow bin. If "show", add additional under/overflow bin. + If "sum", add the under/overflow bin content to first/last bin. + If "hint", draw markers at the axis to indicate presence of under/overflow. + If "none", do nothing. Default is "hint". **histplot_kwargs : optional Arguments to be passed to histplot(), called in case the comparison is "pull", or plot_error_hist(), called for every other comparison case. In the former case, the default arguments are histtype="stepfilled" and color="darkgrey". In the later case, the default argument is color="black". @@ -204,11 +273,112 @@ def comparison( _check_counting_histogram(h1_plottable) _check_counting_histogram(h2_plottable) - comparison_values, lower_uncertainties, upper_uncertainties = get_comparison( - h1_plottable, h2_plottable, comparison, h1_w2method - ) + # When flow="show", we need to compute comparison on flow-included values + # so that the comparison histogram also has underflow/overflow bins + used_flow_bins = False + if flow == "show": + # Try to get flow bins if they exist (from original histogram objects, not plottable) + try: + # First check if both histograms support flow=True without actually calling it + # This prevents side effects from accessing flow values on histograms that don't have them + if ( + hasattr(h1, "values") + and hasattr(h2, "values") + and hasattr(h1.values, "__call__") + and hasattr(h2.values, "__call__") + ): + # Access flow bins from the original histogram objects + h1_flow_values = h1.values(flow=True) + + # Check if histogram actually has flow bins (length should be +2) + if len(h1_flow_values) == len(h1_plottable.values()) + 2: + # Use the original histograms which already have flow bins + h1_for_comparison = h1 + h2_for_comparison = h2 + used_flow_bins = True + else: + # No actual flow bins, use regular histograms + h1_for_comparison = h1_plottable + h2_for_comparison = h2_plottable + else: + # Histograms don't support flow parameter + h1_for_comparison = h1_plottable + h2_for_comparison = h2_plottable + except (AttributeError, TypeError): + # Histogram doesn't support flow bins, use regular histograms + h1_for_comparison = h1_plottable + h2_for_comparison = h2_plottable + else: + h1_for_comparison = h1_plottable + h2_for_comparison = h2_plottable + + if used_flow_bins: + # Compute comparison on flow-included values directly + # Since get_comparison() would strip flow bins, we compute it ourselves + h1_vals_flow = h1_for_comparison.values(flow=True) + h2_vals_flow = h2_for_comparison.values(flow=True) + h1_vars_flow = h1_for_comparison.variances(flow=True) + h2_vars_flow = h2_for_comparison.variances(flow=True) + + # For now, only support ratio comparison with flow bins + # Compute ratio: h1/h2 + with np.errstate(divide="ignore", invalid="ignore"): + comparison_values = np.where( + h2_vals_flow != 0, h1_vals_flow / h2_vals_flow, np.nan + ) + # Compute uncertainties (symmetric for now) + if h1_vars_flow is not None and h2_vars_flow is not None: + # Ratio uncertainty: sqrt((var1/val2^2) + (val1^2 * var2 / val2^4)) + ratio_var = np.where( + h2_vals_flow != 0, + (h1_vars_flow / h2_vals_flow**2) + + (h1_vals_flow**2 * h2_vars_flow / h2_vals_flow**4), + np.nan, + ) + lower_uncertainties = np.sqrt(ratio_var) + upper_uncertainties = lower_uncertainties + else: + lower_uncertainties = np.zeros_like(comparison_values) + upper_uncertainties = np.zeros_like(comparison_values) + else: + comparison_values, lower_uncertainties, upper_uncertainties = get_comparison( + h1_for_comparison, h2_for_comparison, comparison, h1_w2method + ) + + # Use the comparison histogram directly if it has flow bins, otherwise create EnhancedPlottableHistogram + if used_flow_bins: + # comparison was computed on flow-included histograms + # Create a new histogram with the same structure + + # Use enhanced plottable histogram for flow comparison + comparison_variances = ( + lower_uncertainties**2 if np.any(lower_uncertainties) else None + ) + + # Create flow-extended edges to match the comparison values + final_bins = h2_plottable.edges_1d() # Use the same method as in plot.py + _flow_bin_size = max( + 0.05 * (final_bins[-1] - final_bins[0]), np.mean(np.diff(final_bins)) + ) + + # For flow="show", we need to always extend edges to match flow values length + # because flow=True always includes underflow/overflow positions + flow_edges = np.copy(final_bins) + h2_flow_values = h2_for_comparison.values(flow=True) + + # Always add underflow and overflow edges when using flow="show" + # to match the structure of flow=True values + flow_edges = np.insert(flow_edges, 0, flow_edges[0] - _flow_bin_size) + flow_edges = np.append(flow_edges, flow_edges[-1] + _flow_bin_size) - if np.allclose(lower_uncertainties, upper_uncertainties, equal_nan=True): + comparison_plottable = EnhancedPlottableHistogram( + comparison_values, + edges=flow_edges, # Use flow-extended edges + variances=comparison_variances, + kind=h2_plottable.kind, + ) + # Regular comparison without flow bins + elif np.allclose(lower_uncertainties, upper_uncertainties, equal_nan=True): comparison_plottable = EnhancedPlottableHistogram( comparison_values, edges=h2_plottable.axes[0].edges, @@ -229,16 +399,25 @@ def comparison( "yerr", [comparison_plottable.yerr_lo, comparison_plottable.yerr_hi] ) - comparison_plottable.errors() + # Only call errors() if it's an EnhancedPlottableHistogram + if hasattr(comparison_plottable, "errors"): + comparison_plottable.errors() + + # Filter out comparison-specific parameters that shouldn't be passed to histplot + _valid_histplot_kwargs = { + k: v + for k, v in histplot_kwargs.items() + if k not in ["ratio", "comparison", "comparison_ylabel", "comparison_ylim"] + } if comparison == "pull": - histplot_kwargs.setdefault("histtype", "fill") - histplot_kwargs.setdefault("color", "darkgrey") - histplot(comparison_plottable, ax=ax, **histplot_kwargs) + _valid_histplot_kwargs.setdefault("histtype", "fill") + _valid_histplot_kwargs.setdefault("color", "darkgrey") + histplot(comparison_plottable, ax=ax, flow=flow, **_valid_histplot_kwargs) else: - histplot_kwargs.setdefault("color", "black") - histplot_kwargs.setdefault("histtype", "errorbar") - histplot(comparison_plottable, ax=ax, **histplot_kwargs) + _valid_histplot_kwargs.setdefault("color", "black") + _valid_histplot_kwargs.setdefault("histtype", "errorbar") + histplot(comparison_plottable, ax=ax, flow=flow, **_valid_histplot_kwargs) if comparison in ["ratio", "split_ratio", "relative_difference"]: if comparison_ylim is None: @@ -307,8 +486,10 @@ def comparison( ax.axhline(0, ls="--", lw=1.0, color="black") ax.set_ylabel(rf"$\frac{{{h1_label} - {h2_label}}}{{{h1_label} + {h2_label}}}$") - xlim = (h1_plottable.edges_1d()[0], h1_plottable.edges_1d()[-1]) - ax.set_xlim(xlim) + # Only set xlim if not showing flow bins (histplot handles xlim for flow="show") + if flow != "show": + xlim = (h1_plottable.edges_1d()[0], h1_plottable.edges_1d()[-1]) + ax.set_xlim(xlim) ax.set_xlabel(xlabel) if comparison_ylim is not None: ax.set_ylim(comparison_ylim) @@ -371,6 +552,7 @@ def data_model( model_uncertainty=True, model_uncertainty_label="MC stat. unc.", data_w2method="poisson", + flow="hint", fig=None, ax_main=None, ax_comparison=None, @@ -417,6 +599,11 @@ def data_model( The label for the model uncertainties. Default is "MC stat. unc.". data_w2method : str, optional What kind of bin uncertainty to use for data_hist: "sqrt" for the Poisson standard deviation derived from the variance stored in the histogram object, "poisson" for asymmetrical uncertainties based on a Poisson confidence interval. Default is "poisson". + flow : str, optional + Whether to plot the under/overflow bin. If "show", add additional under/overflow bin. + If "sum", add the under/overflow bin content to first/last bin. + If "hint", draw markers at the axis to indicate presence of under/overflow. + If "none", do nothing. Default is "hint". fig : matplotlib.figure.Figure or None, optional The figure to use for the plot. If fig, ax_main and ax_comparison are None, a new figure will be created. Default is None. ax_main : matplotlib.axes.Axes or None, optional @@ -453,14 +640,15 @@ def data_model( if stacked_components is None: stacked_components = [] - # Convert input histograms to plottable histograms. + # Convert input histograms to plottable histograms for binning checks. + # Keep original histograms for passing to histplot (to preserve flow bin info). # If the input is a function, it is left unchanged. data_hist_plottable = make_plottable_histogram(data_hist) - stacked_components = [ + stacked_components_plottable = [ make_plottable_histogram(component) if not callable(component) else component for component in stacked_components ] - unstacked_components = [ + unstacked_components_plottable = [ make_plottable_histogram(component) if not callable(component) else component for component in unstacked_components ] @@ -470,11 +658,28 @@ def data_model( unstacked_kwargs_list = unstacked_kwargs_list.copy() model_sum_kwargs = model_sum_kwargs.copy() + # Set flow parameter in kwargs for model plotting + stacked_kwargs.setdefault("flow", flow) + model_sum_kwargs.setdefault("flow", flow) + # Ensure all unstacked kwargs have flow parameter + # If unstacked_kwargs_list is shorter than unstacked_components, extend it + while len(unstacked_kwargs_list) < len(unstacked_components): + unstacked_kwargs_list.append({}) + for i in range(len(unstacked_kwargs_list)): + if unstacked_kwargs_list[i] is None: + unstacked_kwargs_list[i] = {} + else: + unstacked_kwargs_list[i] = unstacked_kwargs_list[i].copy() + unstacked_kwargs_list[i].setdefault("flow", flow) + comparison_kwargs.setdefault("h1_label", data_label) comparison_kwargs.setdefault("h2_label", "MC") comparison_kwargs.setdefault("comparison", "split_ratio") model_components = stacked_components + unstacked_components + model_components_plottable = ( + stacked_components_plottable + unstacked_components_plottable + ) if len(model_components) == 0: msg = "Need to provide at least one model component." @@ -483,8 +688,8 @@ def data_model( model_type = _get_model_type(model_components) if model_type == "histograms": - _check_binning_consistency([*model_components, data_hist_plottable]) - for component in [*model_components, data_hist_plottable]: + _check_binning_consistency([*model_components_plottable, data_hist_plottable]) + for component in [*model_components_plottable, data_hist_plottable]: _check_counting_histogram(component) if fig is None and ax_main is None and ax_comparison is None: @@ -506,6 +711,15 @@ def data_model( msg = "Cannot provide fig, ax_main or ax_comparison with plot_only." raise ValueError(msg) + # For flow="show", don't constrain function_range + if flow == "show": + func_range = None + else: + func_range = [ + data_hist_plottable.edges_1d()[0], + data_hist_plottable.edges_1d()[-1], + ] + model( stacked_components=stacked_components, stacked_labels=stacked_labels, @@ -517,10 +731,7 @@ def data_model( stacked_kwargs=stacked_kwargs, unstacked_kwargs_list=unstacked_kwargs_list, model_sum_kwargs=model_sum_kwargs, - function_range=[ - data_hist_plottable.edges_1d()[0], - data_hist_plottable.edges_1d()[-1], - ], + function_range=func_range, model_uncertainty=model_uncertainty, model_uncertainty_label=model_uncertainty_label, fig=fig, @@ -528,30 +739,55 @@ def data_model( ) histplot( - data_hist_plottable, + data_hist, ax=ax_main, w2method=data_w2method, color="black", label=data_label, histtype="errorbar", + flow=flow, ) + # If flow="show", calculate the correct xlim that includes flow bins + # We need to compute this manually because model() resets xlim to regular edges + flow_xlim = None + if flow == "show": + # Get the bin width to extend xlim for flow bins + edges = data_hist_plottable.edges_1d() + bin_width = edges[1] - edges[0] + # Extend by 1.5 bin widths on each side to show flow bin labels + flow_xlim = (edges[0] - 1.5 * bin_width, edges[-1] + 1.5 * bin_width) + if plot_only == "ax_main": ax_main.set_xlabel(xlabel) else: - _ = ax_main.xaxis.set_ticklabels([]) + # For flow="show", don't clear ticklabels here (histplot set custom labels) + # tick_params(labelbottom=False) at the end will hide them instead + if flow != "show": + _ = ax_main.xaxis.set_ticklabels([]) ax_main.set_xlabel(" ") if model_type == "histograms": - model_hist = sum(model_components) + # Sum the original histograms to preserve flow bin information for comparison + model_hist_orig = sum(model_components) + # Also sum plottables for variance manipulation + model_hist_plottable = sum(model_components_plottable) if not model_uncertainty: - model_hist.set_variances(np.zeros_like(model_hist.variances())) + model_hist_plottable.set_variances( + np.zeros_like(model_hist_plottable.variances()) + ) + # Need to update the original hist's variances too if it's plottable + if hasattr(model_hist_orig, "set_variances"): + model_hist_orig.set_variances( + np.zeros_like(model_hist_orig.variances()) + ) else: def sum_components(x): return sum(f(x) for f in model_components) - model_hist = _make_hist_from_function(sum_components, data_hist_plottable) + model_hist_orig = _make_hist_from_function(sum_components, data_hist_plottable) + model_hist_plottable = model_hist_orig if comparison_kwargs["comparison"] == "pull" and ( model_type == "functions" or not model_uncertainty @@ -564,11 +800,12 @@ def sum_components(x): ax_main.legend() comparison( - data_hist_plottable, - model_hist, + data_hist, + model_hist_orig, ax=ax_comparison, xlabel=xlabel, w2method=data_w2method, + flow=flow, **comparison_kwargs, ) @@ -578,4 +815,58 @@ def sum_components(x): fig.align_ylabels() + # Restore the xlim for flow bins if needed (some operations may have reset it) + if flow == "show" and flow_xlim is not None: + ax_main.set_xlim(flow_xlim) + + # Ensure tick labels appear only on the comparison axis, not on the main axis + # This must be done at the end, after all plotting, because matplotlib's sharex + # mechanism may override labels set during plotting + if flow == "show": + # For flow="show", regenerate tick labels with indicators for flow bins + # matplotlib's sharex clears labels, so we must regenerate from histogram edges + ax_main.tick_params(labelbottom=False) + ax_comparison.tick_params(labelbottom=True) + + # Get histogram edges to identify flow bin boundaries + # Note: data_hist_plottable has original edges [0, 1, 2, ..., 10], not flow-extended + edges = data_hist_plottable.edges_1d() + tick_positions = ax_comparison.get_xticks() + + # Check if there are flow labels (ticks in underflow/overflow regions) + has_underflow = any(tick < edges[0] for tick in tick_positions) + has_overflow = any(tick > edges[-1] for tick in tick_positions) + + # Filter out edge ticks that would overlap with flow labels + filtered_ticks = [] + tick_labels = [] + + for tick in tick_positions: + # Skip edge ticks that would overlap with adjacent flow labels + if has_underflow and abs(tick - edges[0]) < 1e-10: + continue # Skip tick at first regular edge (e.g., 0) + if has_overflow and abs(tick - edges[-1]) < 1e-10: + continue # Skip tick at last regular edge (e.g., 10) + + filtered_ticks.append(tick) + # Check if this tick is outside the regular histogram range (flow bins) + if tick < edges[0]: # Underflow bin + tick_labels.append(f"<{edges[0]:g}") + elif tick > edges[-1]: # Overflow bin + tick_labels.append(f">{edges[-1]:g}") + else: + tick_labels.append(f"{tick:g}") + + ax_comparison.set_xticks(filtered_ticks) + ax_comparison.set_xticklabels(tick_labels) + else: + # For other flow options, control label visibility and regenerate labels + ax_main.tick_params(labelbottom=False) + ax_comparison.tick_params(labelbottom=True) + # Explicitly regenerate tick labels on the comparison axis + # (they may have been set to empty strings during plotting) + tick_positions = ax_comparison.get_xticks() + ax_comparison.set_xticks(tick_positions) + ax_comparison.set_xticklabels([f"{tick:g}" for tick in tick_positions]) + return fig, ax_main, ax_comparison diff --git a/src/mplhep/plot.py b/src/mplhep/plot.py index f6ddaaa5..c77c3ee8 100644 --- a/src/mplhep/plot.py +++ b/src/mplhep/plot.py @@ -391,6 +391,16 @@ def histplot( edges = bool(edges) binticks = bool(binticks) + # Handle ratio plots - redirect to comparison functionality + if kwargs.get("ratio", False): + from .comparison_plotters import hists + + # Remove ratio from kwargs before passing to comparison function + _kwargs = {k: v for k, v in kwargs.items() if k != "ratio"} + if isinstance(H, (list, tuple)) and len(H) >= 2: + return hists(H[0], H[1], comparison="ratio", flow=flow, **_kwargs) + raise ValueError("ratio=True requires at least 2 histograms for comparison") + # Process input hists = list(process_histogram_parts(H, bins)) final_bins, xtick_labels = _get_plottable_protocol_bins(hists[0].axes[0]) @@ -468,7 +478,7 @@ def iterable_not_string(arg): flow=flow, xoffsets=xoffsets, ) - flow_bins, underflow, overflow = flow_info + _flow_bins, underflow, overflow = flow_info ########## # Plotting @@ -708,61 +718,45 @@ def iterable_not_string(arg): msg = "No figure found" raise ValueError(msg) if flow == "hint": + # Get all shared x-axes to draw markers on all of them + shared_axes = ax.get_shared_x_axes().get_siblings(ax) + shared_axes = [ + _ax for _ax in shared_axes if _ax.get_position().x0 == ax.get_position().x0 + ] + _marker_size = ( 30 * ax.get_window_extent().transformed(fig.dpi_scale_trans.inverted()).width ) - if underflow > 0.0: - ax.scatter( - final_bins[0], - 0, - _marker_size, - marker=align_marker("<", halign="right"), - edgecolor="black", - zorder=5, - clip_on=False, - facecolor="white", - transform=ax.get_xaxis_transform(), - ) - if overflow > 0.0: - ax.scatter( - final_bins[-1], - 0, - _marker_size, - marker=align_marker(">", halign="left"), - edgecolor="black", - zorder=5, - clip_on=False, - facecolor="white", - transform=ax.get_xaxis_transform(), - ) - - elif flow == "show": - underflow_xticklabel = f"<{flow_bins[1]:g}" - overflow_xticklabel = f">{flow_bins[-2]:g}" - # Loop over shared x axes to get xticks and xticklabels - xticks, xticklabels = np.array([]), [] - shared_axes = ax.get_shared_x_axes().get_siblings(ax) - shared_axes = [ - _ax for _ax in shared_axes if _ax.get_position().x0 == ax.get_position().x0 - ] + # Draw markers on all shared axes for _ax in shared_axes: - _xticks = _ax.get_xticks() - _xticklabels = [label.get_text() for label in _ax.get_xticklabels()] - - # Check if underflow/overflow xtick already exists - if ( - underflow_xticklabel in _xticklabels - or overflow_xticklabel in _xticklabels - ): - xticks = _xticks - xticklabels = _xticklabels - break - if len(_xticklabels) > 0: - xticks = _xticks - xticklabels = _xticklabels + if underflow > 0.0: + _ax.scatter( + final_bins[0], + 0, + _marker_size, + marker=align_marker("<", halign="right"), + edgecolor="black", + zorder=5, + clip_on=False, + facecolor="white", + transform=_ax.get_xaxis_transform(), + ) + if overflow > 0.0: + _ax.scatter( + final_bins[-1], + 0, + _marker_size, + marker=align_marker(">", halign="left"), + edgecolor="black", + zorder=5, + clip_on=False, + facecolor="white", + transform=_ax.get_xaxis_transform(), + ) + elif flow == "show": lw = ax.spines["bottom"].get_linewidth() _edges = plottables[0].edges_1d() _centers = plottables[0].centers @@ -771,91 +765,101 @@ def iterable_not_string(arg): * ax.get_window_extent().transformed(fig.dpi_scale_trans.inverted()).width ) - if underflow > 0.0 or underflow_xticklabel in xticklabels: - # Replace any existing xticks in underflow region with underflow bin center - _mask = xticks > flow_bins[1] - xticks = np.insert(xticks[_mask], 0, _centers[0]) - xticklabels = [underflow_xticklabel] + [ - xlab for i, xlab in enumerate(xticklabels) if _mask[i] - ] + # Use edge values for flow bin labels (not center values) + underflow_xticklabel = f"<{_edges[1]:g}" + overflow_xticklabel = f">{_edges[-2]:g}" - # Don't draw markers on the top of the top axis - top_axis = max(shared_axes, key=lambda a: a.get_position().y0) + # Get shared axes for marker placement + shared_axes = ax.get_shared_x_axes().get_siblings(ax) + shared_axes = [ + _ax for _ax in shared_axes if _ax.get_position().x0 == ax.get_position().x0 + ] - # Draw on all shared axes + # Use existing tick positions (from matplotlib's default ticker) + # rather than creating ticks at every bin edge + existing_ticks = ax.get_xticks() + # Filter to only include ticks within the regular bin range + regular_edges = _edges[1:-1] + new_xticks = [ + tick + for tick in existing_ticks + if regular_edges[0] <= tick <= regular_edges[-1] + ] + new_xticklabels = [f"{tick:g}" for tick in new_xticks] + + # Find bottom axis for marker placement + bottom_axis = min(shared_axes, key=lambda a: a.get_position().y0) + + if underflow > 0.0: + # Add underflow bin center at the beginning + new_xticks.insert(0, _centers[0]) + new_xticklabels.insert(0, underflow_xticklabel) + + # Draw markers only on the bottom (h=0) of axes for _ax in shared_axes: - _ax.set_xticks(xticks) - _ax.set_xticklabels(xticklabels) - for h in [0, 1]: - # Don't draw marker on the top of the top axis - if _ax == top_axis and h == 1: - continue - - _ax.plot( - [_edges[0], _edges[1]], - [h, h], - color="white", - zorder=5, - ls="--", - lw=lw, - transform=_ax.get_xaxis_transform(), - clip_on=False, - ) + h = 0 # Only draw on bottom + _ax.plot( + [_edges[0], _edges[1]], + [h, h], + color="white", + zorder=5, + ls="--", + lw=lw, + transform=_ax.get_xaxis_transform(), + clip_on=False, + ) - _ax.scatter( - _centers[0], - h, - _marker_size, - marker=align_marker("d", valign="center"), - edgecolor="black", - zorder=5, - clip_on=False, - facecolor="white", - transform=_ax.get_xaxis_transform(), - ) - if overflow > 0.0 or overflow_xticklabel in xticklabels: - # Replace any existing xticks in overflow region with overflow bin center - _mask = xticks < flow_bins[-2] - xticks = np.insert(xticks[_mask], sum(_mask), _centers[-1]) - xticklabels = [xlab for i, xlab in enumerate(xticklabels) if _mask[i]] + [ - overflow_xticklabel - ] - - # Don't draw markers on the top of the top axis - top_axis = max(shared_axes, key=lambda a: a.get_position().y0) - - # Draw on all shared axes + _ax.scatter( + _centers[0], + h, + _marker_size, + marker=align_marker("d", valign="center"), + edgecolor="black", + zorder=5, + clip_on=False, + facecolor="white", + transform=_ax.get_xaxis_transform(), + ) + if overflow > 0.0: + # Add overflow bin center at the end + new_xticks.append(_centers[-1]) + new_xticklabels.append(overflow_xticklabel) + + # Draw markers only on the bottom (h=0) of axes for _ax in shared_axes: - _ax.set_xticks(xticks) - _ax.set_xticklabels(xticklabels) - - for h in [0, 1]: - # Don't draw marker on the top of the top axis - if _ax == top_axis and h == 1: - continue - - _ax.plot( - [_edges[-2], _edges[-1]], - [h, h], - color="white", - zorder=5, - ls="--", - lw=lw, - transform=_ax.get_xaxis_transform(), - clip_on=False, - ) + h = 0 # Only draw on bottom + _ax.plot( + [_edges[-2], _edges[-1]], + [h, h], + color="white", + zorder=5, + ls="--", + lw=lw, + transform=_ax.get_xaxis_transform(), + clip_on=False, + ) - _ax.scatter( - _centers[-1], - h, - _marker_size, - marker=align_marker("d", valign="center"), - edgecolor="black", - zorder=5, - clip_on=False, - facecolor="white", - transform=_ax.get_xaxis_transform(), - ) + _ax.scatter( + _centers[-1], + h, + _marker_size, + marker=align_marker("d", valign="center"), + edgecolor="black", + zorder=5, + clip_on=False, + facecolor="white", + transform=_ax.get_xaxis_transform(), + ) + + # Set the final xticks and xticklabels on all shared axes + for _ax in shared_axes: + _ax.set_xticks(new_xticks) + # Only set tick labels on the bottom axis + if _ax == bottom_axis: + _ax.set_xticklabels(new_xticklabels) + else: + # Explicitly set empty labels on other axes + _ax.set_xticklabels(["" for _ in new_xticks]) return return_artists @@ -1400,6 +1404,8 @@ def model( histtype="band", ) else: + # Remove flow parameter for funcplot (it only works with histplot) + funcplot_kwargs = {k: v for k, v in stacked_kwargs.items() if k != "flow"} funcplot( stacked_components, ax=ax, @@ -1407,7 +1413,7 @@ def model( colors=stacked_colors, labels=stacked_labels, range=xlim, - **stacked_kwargs, + **funcplot_kwargs, ) if len(unstacked_components) > 0: @@ -1435,6 +1441,10 @@ def model( **unstacked_kwargs, ) else: + # Remove flow parameter for funcplot (it only works with histplot) + funcplot_unstacked_kwargs = { + k: v for k, v in unstacked_kwargs.items() if k != "flow" + } funcplot( component, ax=ax, @@ -1442,7 +1452,7 @@ def model( color=color, label=label, range=xlim, - **unstacked_kwargs, + **funcplot_unstacked_kwargs, ) # Plot the sum of all the components if model_sum_kwargs.pop("show", True) and ( @@ -1472,11 +1482,15 @@ def model( def sum_function(x): return sum(f(x) for f in components) + # Remove flow parameter for funcplot (it only works with histplot) + funcplot_sum_kwargs = { + k: v for k, v in model_sum_kwargs.items() if k != "flow" + } funcplot( sum_function, ax=ax, range=xlim, - **model_sum_kwargs, + **funcplot_sum_kwargs, ) elif ( model_uncertainty @@ -1488,7 +1502,20 @@ def sum_function(x): sum(components), ax=ax, label=model_uncertainty_label, histtype="band" ) - ax.set_xlim(xlim) + # Check if flow="show" is set in any of the kwargs + # If so, don't reset xlim as histplot will have set it correctly for flow bins + flow_in_kwargs = ( + stacked_kwargs.get("flow") == "show" + or model_sum_kwargs.get("flow") == "show" + or any( + kwargs.get("flow") == "show" + for kwargs in unstacked_kwargs_list + if kwargs is not None + ) + ) + + if not flow_in_kwargs: + ax.set_xlim(xlim) ax.set_xlabel(xlabel) ax.set_ylabel(ylabel) set_fitting_ylabel_fontsize(ax) diff --git a/tests/baseline/test_hist2dplot_cbar_False.png b/tests/baseline/test_hist2dplot_cbar_False.png index fc8f04ba..f56d6c9e 100644 Binary files a/tests/baseline/test_hist2dplot_cbar_False.png and b/tests/baseline/test_hist2dplot_cbar_False.png differ diff --git a/tests/from_issues/baseline/test_comparison_flow.png b/tests/from_issues/baseline/test_comparison_flow.png new file mode 100644 index 00000000..4603f2a8 Binary files /dev/null and b/tests/from_issues/baseline/test_comparison_flow.png differ diff --git a/tests/from_issues/baseline/test_hists_flow.png b/tests/from_issues/baseline/test_hists_flow.png new file mode 100644 index 00000000..e3b0ca29 Binary files /dev/null and b/tests/from_issues/baseline/test_hists_flow.png differ diff --git a/tests/from_issues/baseline/test_issue_594.png b/tests/from_issues/baseline/test_issue_594.png new file mode 100644 index 00000000..5c1054c2 Binary files /dev/null and b/tests/from_issues/baseline/test_issue_594.png differ diff --git a/tests/from_issues/test_comparison_flow.py b/tests/from_issues/test_comparison_flow.py new file mode 100644 index 00000000..808342ed --- /dev/null +++ b/tests/from_issues/test_comparison_flow.py @@ -0,0 +1,56 @@ +import hist +import matplotlib.pyplot as plt +import numpy as np +import pytest + +import mplhep as hep + + +@pytest.mark.mpl_image_compare(remove_text=False, style="default", tolerance=20) +def test_comparison_flow(): + """Test all flow options with comparison plotters (data_model).""" + np.random.seed(42) + + # Create histograms with underflow and overflow + h_data = hist.Hist( + hist.axis.Regular(10, 0, 10, name="x", underflow=True, overflow=True) + ) + h_data.fill(np.random.normal(5, 2, 1000)) + + h_model = hist.Hist( + hist.axis.Regular(10, 0, 10, name="x", underflow=True, overflow=True) + ) + h_model.fill(np.random.normal(5, 2, 1000)) + + # Create 2x2 grid for different flow options + fig = plt.figure(figsize=(14, 10)) + + # Define grid layout - 2 rows per subplot (main + comparison) + gs = fig.add_gridspec(4, 2, height_ratios=[3, 1, 3, 1], hspace=0.3, wspace=0.3) + + flow_options = ["hint", "show", "sum", "none"] + titles = ["Default(hint)", "Show", "Sum", "None"] + + for idx, (flow_opt, title) in enumerate(zip(flow_options, titles)): + row_offset = (idx // 2) * 2 # 0 for first two, 2 for last two + col = idx % 2 + + ax_main = fig.add_subplot(gs[row_offset, col]) + ax_comparison = fig.add_subplot(gs[row_offset + 1, col], sharex=ax_main) + + # Create comparison plot with the specific flow option + _, _, _ = hep.comparison_plotters.data_model( + h_data, + unstacked_components=[h_model], + unstacked_labels=["Model"], + data_label="Data", + xlabel="x", + flow=flow_opt, + fig=fig, + ax_main=ax_main, + ax_comparison=ax_comparison, + ) + + ax_main.set_title(title, fontsize=14) + + return fig diff --git a/tests/from_issues/test_hists_flow.py b/tests/from_issues/test_hists_flow.py new file mode 100644 index 00000000..71d70855 --- /dev/null +++ b/tests/from_issues/test_hists_flow.py @@ -0,0 +1,56 @@ +import hist +import matplotlib.pyplot as plt +import numpy as np +import pytest + +import mplhep as hep + + +@pytest.mark.mpl_image_compare(remove_text=False, style="default", tolerance=20) +def test_hists_flow(): + """Test all flow options with comparison plotters (hists).""" + np.random.seed(42) + + # Create histograms with underflow and overflow + h1 = hist.Hist( + hist.axis.Regular(10, 0, 10, name="x", underflow=True, overflow=True) + ) + h1.fill(np.random.normal(5, 2, 1000)) + + h2 = hist.Hist( + hist.axis.Regular(10, 0, 10, name="x", underflow=True, overflow=True) + ) + h2.fill(np.random.normal(5, 2, 1000)) + + # Create 2x2 grid for different flow options + fig = plt.figure(figsize=(14, 10)) + + # Define grid layout - 2 rows per subplot (main + comparison) + gs = fig.add_gridspec(4, 2, height_ratios=[3, 1, 3, 1], hspace=0.3, wspace=0.3) + + flow_options = ["hint", "show", "sum", "none"] + titles = ["Default(hint)", "Show", "Sum", "None"] + + for idx, (flow_opt, title) in enumerate(zip(flow_options, titles)): + row_offset = (idx // 2) * 2 # 0 for first two, 2 for last two + col = idx % 2 + + ax_main = fig.add_subplot(gs[row_offset, col]) + ax_comparison = fig.add_subplot(gs[row_offset + 1, col], sharex=ax_main) + + # Create comparison plot with the specific flow option + _, _, _ = hep.comparison_plotters.hists( + h1, + h2, + h1_label="h1", + h2_label="h2", + xlabel="x", + flow=flow_opt, + fig=fig, + ax_main=ax_main, + ax_comparison=ax_comparison, + ) + + ax_main.set_title(title, fontsize=14) + + return fig diff --git a/tests/from_issues/test_issue594.py b/tests/from_issues/test_issue594.py new file mode 100644 index 00000000..bef3cb16 --- /dev/null +++ b/tests/from_issues/test_issue594.py @@ -0,0 +1,19 @@ +import hist +import numpy as np +import pytest + +import mplhep as hep + + +@pytest.mark.mpl_image_compare(remove_text=False) +def test_issue_594(): + np.random.seed(42) # Set seed for reproducible results + h1 = hist.Hist(hist.axis.Regular(10, 0, 10, underflow=True, overflow=True)) + h1.fill(np.random.normal(5, 2, 1000)) + h2 = hist.Hist(hist.axis.Regular(10, 0, 10, underflow=True, overflow=True)) + h2.fill(np.random.normal(5, 2, 1000)) + + fig, ax, rax = hep.comparison_plotters.data_model( + h1, unstacked_components=[h2], flow="show" + ) + return fig diff --git a/tests/test_mock.py b/tests/test_mock.py index 8aa101d4..2bf15735 100644 --- a/tests/test_mock.py +++ b/tests/test_mock.py @@ -31,6 +31,18 @@ def mock_matplotlib(mocker): _get_lines.get_next_color.return_value = "next-color" ax._get_lines = _get_lines + # Mock shared axes methods for flow parameter support + mock_shared_axes = mocker.Mock() + mock_shared_axes.get_siblings.return_value = [ + ax + ] # Return list containing current axis + ax.get_shared_x_axes.return_value = mock_shared_axes + + # Mock position method + mock_position = mocker.Mock() + mock_position.x0 = 0.1 # Arbitrary x-position value + ax.get_position.return_value = mock_position + mpl = mocker.patch("matplotlib.pyplot", autospec=True) mocker.patch("matplotlib.pyplot.subplots", return_value=(fig, ax)) @@ -44,7 +56,7 @@ def test_simple(mock_matplotlib): bins = [0, 1, 2, 3] mh.histplot(h, bins, yerr=True, label="X", ax=ax) - assert len(ax.mock_calls) == 13 + assert len(ax.mock_calls) == 17 # Updated count due to shared axes functionality ax.stairs.assert_called_once_with( values=approx([1.0, 3.0, 2.0]), @@ -89,7 +101,7 @@ def test_histplot_real(mock_matplotlib): mh.histplot([a, b, c], bins=bins, ax=ax, yerr=True, label=["MC1", "MC2", "Data"]) ax.legend() ax.set_title("Raw") - assert len(ax.mock_calls) == 27 + assert len(ax.mock_calls) == 31 # Updated count due to shared axes functionality ax.reset_mock() @@ -97,7 +109,7 @@ def test_histplot_real(mock_matplotlib): mh.histplot([c], bins=bins, ax=ax, yerr=True, histtype="errorbar", label="Data") ax.legend() ax.set_title("Data/MC") - assert len(ax.mock_calls) == 20 + assert len(ax.mock_calls) == 28 # Updated count due to shared axes functionality ax.reset_mock() mh.histplot( @@ -114,7 +126,7 @@ def test_histplot_real(mock_matplotlib): ) ax.legend() ax.set_title("Data/MC binwnorm") - assert len(ax.mock_calls) == 20 + assert len(ax.mock_calls) == 28 # Updated count due to shared axes functionality ax.reset_mock() mh.histplot( @@ -131,4 +143,4 @@ def test_histplot_real(mock_matplotlib): ) ax.legend() ax.set_title("Data/MC Density") - assert len(ax.mock_calls) == 20 + assert len(ax.mock_calls) == 28 # Updated count due to shared axes functionality