Skip to content

Commit e44f5fa

Browse files
authored
Merge pull request #192 from GEECS-BELLA/Make-ScanPaths-attribute-of-ScanData-
Cleaner implementation of ScanData with additional simple visualization utilities
2 parents f22eae9 + 9b21cf1 commit e44f5fa

File tree

23 files changed

+3566
-986
lines changed

23 files changed

+3566
-986
lines changed

GEECS-Data-Utils/geecs_data_utils/__init__.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -13,7 +13,7 @@
1313
Examples
1414
--------
1515
>>> from geecs_data_utils import ScanData, ScanPaths
16-
>>> scan_data = ScanData(year=2024, month=1, day=15, scan_number=42)
16+
>>> scan_paths = ScanData(year=2024, month=1, day=15, scan_number=42)
1717
"""
1818

1919
from geecs_data_utils.scan_data import ScanData
Lines changed: 296 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,296 @@
1+
"""
2+
Minimal plotting utilities for binned scan data.
3+
4+
This module provides helper functions to plot aggregated (binned) scalar data
5+
from scans using a frozen schema. It supports both single and multi-series
6+
errorbar plots with optional asymmetric errors and index-based x-axes.
7+
8+
Functions
9+
---------
10+
_get_center_and_err
11+
Extract center values and optional asymmetric errors for a column.
12+
_index_to_numeric
13+
Convert an index to numeric values, handling IntervalIndex specially.
14+
plot_binned
15+
Plot a single y-series versus x from binned data.
16+
plot_binned_multi
17+
Overlay multiple y-series versus the same x.
18+
"""
19+
20+
from __future__ import annotations
21+
from typing import Optional, Sequence, Tuple
22+
import numpy as np
23+
import pandas as pd
24+
import matplotlib.pyplot as plt
25+
26+
27+
def _get_center_and_err(
28+
binned: pd.DataFrame, col: str
29+
) -> Tuple[np.ndarray, Optional[np.ndarray]]:
30+
"""
31+
Extract center values and optional errors for a column.
32+
33+
This function enforces a frozen schema:
34+
- For MultiIndex columns, expects subcolumns 'center', and optionally
35+
'err_low' and 'err_high'.
36+
- For flat columns, returns values directly without errors.
37+
38+
Parameters
39+
----------
40+
binned : pandas.DataFrame
41+
DataFrame of binned values. May have a MultiIndex or flat columns.
42+
col : str
43+
Column name to extract values for.
44+
45+
Returns
46+
-------
47+
y : numpy.ndarray
48+
Center values for the requested column.
49+
yerr : numpy.ndarray or None
50+
Asymmetric errors with shape (2, N) if both 'err_low' and 'err_high'
51+
are present. Otherwise None.
52+
53+
Raises
54+
------
55+
KeyError
56+
If the requested column does not exist in the expected schema.
57+
"""
58+
if isinstance(binned.columns, pd.MultiIndex):
59+
cols = binned.columns
60+
if (col, "center") in cols:
61+
y = binned[(col, "center")].to_numpy()
62+
if (col, "err_low") in cols and (col, "err_high") in cols:
63+
yerr = np.vstack(
64+
[
65+
binned[(col, "err_low")].to_numpy(),
66+
binned[(col, "err_high")].to_numpy(),
67+
]
68+
)
69+
else:
70+
yerr = None
71+
return y, yerr
72+
# no fallback guessing when schema is frozen
73+
raise KeyError(f"'{col}' has no 'center' subcolumn in binned_scalars.")
74+
else:
75+
if col in binned.columns:
76+
return binned[col].to_numpy(), None
77+
raise KeyError(f"Column '{col}' not found in binned_scalars.")
78+
79+
80+
def _index_to_numeric(idx: pd.Index) -> np.ndarray:
81+
"""
82+
Convert a pandas Index into numeric values.
83+
84+
- If the index is an IntervalIndex, use midpoints.
85+
- Otherwise, attempt direct float conversion.
86+
- On failure, fallback to sequential integers.
87+
88+
Parameters
89+
----------
90+
idx : pandas.Index
91+
Input index to convert.
92+
93+
Returns
94+
-------
95+
values : numpy.ndarray
96+
Numeric representation of the index.
97+
"""
98+
if isinstance(idx, pd.IntervalIndex):
99+
return ((idx.left.astype(float) + idx.right.astype(float)) / 2.0).to_numpy()
100+
try:
101+
return idx.to_numpy(dtype=float, copy=False)
102+
except Exception:
103+
return np.arange(len(idx), dtype=float)
104+
105+
106+
def plot_binned(
107+
binned: pd.DataFrame,
108+
x_col: Optional[str],
109+
y_col: str,
110+
*,
111+
use_index_as_x: bool = False,
112+
ax: Optional[plt.Axes] = None,
113+
marker: str = "o",
114+
linestyle: str = "-",
115+
label: Optional[str] = None,
116+
xscale: str = "linear",
117+
yscale: str = "linear",
118+
grid: bool = True,
119+
) -> plt.Axes:
120+
"""
121+
Plot a single y-series versus x from binned data.
122+
123+
Parameters
124+
----------
125+
binned : pandas.DataFrame
126+
DataFrame of binned scalar values, with frozen schema.
127+
x_col : str or None
128+
Column to use for the x-axis. If None or `use_index_as_x=True`,
129+
use the DataFrame index.
130+
y_col : str
131+
Column to plot as y-axis values. Must exist as 'center' in MultiIndex
132+
schema or as a flat column.
133+
use_index_as_x : bool, default=False
134+
If True, use the DataFrame index for x-values regardless of `x_col`.
135+
ax : matplotlib.axes.Axes, optional
136+
Axes to plot into. If None, create a new figure and axes.
137+
marker : str, default="o"
138+
Marker style for the plot.
139+
linestyle : str, default="-"
140+
Line style for the plot.
141+
label : str, optional
142+
Label for the plotted series. If provided, a legend is added.
143+
xscale : {"linear", "log"}, default="linear"
144+
X-axis scale type.
145+
yscale : {"linear", "log"}, default="linear"
146+
Y-axis scale type.
147+
grid : bool, default=True
148+
Whether to draw a grid.
149+
150+
Returns
151+
-------
152+
ax : matplotlib.axes.Axes
153+
The matplotlib Axes containing the plot.
154+
155+
Raises
156+
------
157+
KeyError
158+
If requested columns are not found in the schema.
159+
"""
160+
# y (required)
161+
y, yerr = _get_center_and_err(binned, y_col)
162+
163+
# x
164+
if use_index_as_x or x_col is None:
165+
x = _index_to_numeric(binned.index)
166+
xerr = None
167+
xlabel = binned.index.name or "bin"
168+
else:
169+
x, xerr = _get_center_and_err(binned, x_col)
170+
xlabel = x_col
171+
172+
# mask + sort
173+
mask = np.isfinite(x) & np.isfinite(y)
174+
x, y = x[mask], y[mask]
175+
if xerr is not None:
176+
xerr = xerr[:, mask]
177+
if yerr is not None:
178+
yerr = yerr[:, mask]
179+
180+
order = np.argsort(x)
181+
x, y = x[order], y[order]
182+
if xerr is not None:
183+
xerr = xerr[:, order]
184+
if yerr is not None:
185+
yerr = yerr[:, order]
186+
187+
if ax is None:
188+
_, ax = plt.subplots()
189+
ax.errorbar(
190+
x, y, xerr=xerr, yerr=yerr, fmt=marker, linestyle=linestyle, label=label
191+
)
192+
ax.set_xlabel(xlabel)
193+
ax.set_ylabel(y_col)
194+
ax.set_xscale(xscale)
195+
ax.set_yscale(yscale)
196+
if grid:
197+
ax.grid(True, alpha=0.3)
198+
if label:
199+
ax.legend()
200+
return ax
201+
202+
203+
def plot_binned_multi(
204+
binned: pd.DataFrame,
205+
x_col: Optional[str],
206+
y_cols: Sequence[str],
207+
*,
208+
use_index_as_x: bool = False,
209+
labels: Optional[Sequence[str]] = None,
210+
ax: Optional[plt.Axes] = None,
211+
markers: Optional[Sequence[str]] = None,
212+
linestyles: Optional[Sequence[str]] = None,
213+
xscale: str = "linear",
214+
yscale: str = "linear",
215+
grid: bool = True,
216+
) -> plt.Axes:
217+
"""
218+
Overlay multiple y-series versus the same x-axis.
219+
220+
Parameters
221+
----------
222+
binned : pandas.DataFrame
223+
DataFrame of binned scalar values, with frozen schema.
224+
x_col : str or None
225+
Column to use for the x-axis. If None or `use_index_as_x=True`,
226+
use the DataFrame index.
227+
y_cols : sequence of str
228+
List of column names to plot as y-axis values.
229+
use_index_as_x : bool, default=False
230+
If True, use the DataFrame index for x-values regardless of `x_col`.
231+
labels : sequence of str, optional
232+
Labels for the plotted series. Defaults to `y_cols`.
233+
ax : matplotlib.axes.Axes, optional
234+
Axes to plot into. If None, create a new figure and axes.
235+
markers : sequence of str, optional
236+
Markers for each series. Defaults to a set of common markers.
237+
linestyles : sequence of str, optional
238+
Linestyles for each series. Defaults to all solid lines.
239+
xscale : {"linear", "log"}, default="linear"
240+
X-axis scale type.
241+
yscale : {"linear", "log"}, default="linear"
242+
Y-axis scale type.
243+
grid : bool, default=True
244+
Whether to draw a grid.
245+
246+
Returns
247+
-------
248+
ax : matplotlib.axes.Axes
249+
The matplotlib Axes containing the plot.
250+
251+
Raises
252+
------
253+
KeyError
254+
If requested columns are not found in the schema.
255+
"""
256+
if ax is None:
257+
_, ax = plt.subplots()
258+
if labels is None:
259+
labels = y_cols
260+
if markers is None:
261+
markers = ["o", "s", "D", "^", "v", "P", "X"]
262+
if linestyles is None:
263+
linestyles = ["-"] * len(y_cols)
264+
265+
# common x
266+
if use_index_as_x or x_col is None:
267+
x = _index_to_numeric(binned.index)
268+
xerr = None
269+
xlabel = binned.index.name or "bin"
270+
else:
271+
x, xerr = _get_center_and_err(binned, x_col)
272+
xlabel = x_col
273+
order = np.argsort(x)
274+
275+
for i, (yc, lab) in enumerate(zip(y_cols, labels)):
276+
y, yerr = _get_center_and_err(binned, yc)
277+
m = markers[i % len(markers)]
278+
ls = linestyles[i % len(linestyles)]
279+
ax.errorbar(
280+
x[order],
281+
y[order],
282+
xerr=None if xerr is None else xerr[:, order],
283+
yerr=None if yerr is None else yerr[:, order],
284+
fmt=m,
285+
linestyle=ls,
286+
label=lab,
287+
)
288+
289+
ax.set_xlabel(xlabel)
290+
ax.set_ylabel(", ".join(y_cols) if len(y_cols) == 1 else "value")
291+
ax.set_xscale(xscale)
292+
ax.set_yscale(yscale)
293+
if grid:
294+
ax.grid(True, alpha=0.3)
295+
ax.legend()
296+
return ax

0 commit comments

Comments
 (0)