Skip to content

Commit c8e0dc4

Browse files
authored
Add comparison mode for the echem block (#1353)
1 parent 4243930 commit c8e0dc4

File tree

8 files changed

+702
-180
lines changed

8 files changed

+702
-180
lines changed

pydatalab/src/pydatalab/apps/echem/blocks.py

Lines changed: 84 additions & 32 deletions
Original file line numberDiff line numberDiff line change
@@ -114,7 +114,6 @@ def _load(self, file_ids: list[ObjectId] | ObjectId, reload: bool = True):
114114
"dqdv": "dQ/dV (mA/V)",
115115
"dvdq": "dV/dQ (V/mA)",
116116
}
117-
118117
if isinstance(file_ids, ObjectId):
119118
file_ids = [file_ids]
120119

@@ -198,7 +197,6 @@ def _load(self, file_ids: list[ObjectId] | ObjectId, reload: bool = True):
198197

199198
def plot_cycle(self):
200199
"""Plots the electrochemical cycling data from the file ID provided in the request."""
201-
202200
# Legacy support for when file_id was used
203201
if self.data.get("file_id") is not None and not self.data.get("file_ids"):
204202
LOGGER.info("Legacy file upload detected, using file_id")
@@ -234,42 +232,96 @@ def plot_cycle(self):
234232
if not isinstance(cycle_list, list):
235233
cycle_list = None
236234

237-
raw_df, cycle_summary_df = self._load(file_ids=file_ids)
235+
raw_dfs = {}
236+
cycle_summary_dfs = {}
238237

239-
characteristic_mass_g = self._get_characteristic_mass_g()
238+
# Single/multi mode gets a single dataframe - returned as a dict for consistency
239+
if self.data.get("mode") == "multi" or self.data.get("mode") == "single":
240+
file_info = get_file_info_by_id(file_ids[0], update_if_live=True)
241+
filename = file_info["name"]
242+
raw_df, cycle_summary_df = self._load(file_ids=file_ids)
243+
244+
characteristic_mass_g = self._get_characteristic_mass_g()
245+
246+
if characteristic_mass_g:
247+
raw_df["capacity (mAh/g)"] = raw_df["capacity (mAh)"] / characteristic_mass_g
248+
raw_df["current (mA/g)"] = raw_df["current (mA)"] / characteristic_mass_g
249+
if cycle_summary_df is not None:
250+
cycle_summary_df["charge capacity (mAh/g)"] = (
251+
cycle_summary_df["charge capacity (mAh)"] / characteristic_mass_g
252+
)
253+
cycle_summary_df["discharge capacity (mAh/g)"] = (
254+
cycle_summary_df["discharge capacity (mAh)"] / characteristic_mass_g
255+
)
256+
257+
if self.data.get("mode") == "multi":
258+
p = Path(filename)
259+
filename = f"{p.stem}_merged{p.suffix}"
260+
raw_dfs[filename] = raw_df
261+
cycle_summary_dfs[filename] = cycle_summary_df
262+
elif self.data.get("mode") == "single":
263+
raw_dfs[filename] = raw_df
264+
cycle_summary_dfs[filename] = cycle_summary_df
240265

241-
if characteristic_mass_g:
242-
raw_df["capacity (mAh/g)"] = raw_df["capacity (mAh)"] / characteristic_mass_g
243-
raw_df["current (mA/g)"] = raw_df["current (mA)"] / characteristic_mass_g
244-
if cycle_summary_df is not None:
245-
cycle_summary_df["charge capacity (mAh/g)"] = (
246-
cycle_summary_df["charge capacity (mAh)"] / characteristic_mass_g
247-
)
248-
cycle_summary_df["discharge capacity (mAh/g)"] = (
249-
cycle_summary_df["discharge capacity (mAh)"] / characteristic_mass_g
250-
)
266+
else:
267+
raise ValueError(f"Invalid mode {self.data.get('mode')}")
251268

252-
df = filter_df_by_cycle_index(raw_df, cycle_list)
253-
if cycle_summary_df is not None:
254-
cycle_summary_df = filter_df_by_cycle_index(cycle_summary_df, cycle_list)
255-
256-
if mode in ("dQ/dV", "dV/dQ"):
257-
df = compute_gpcl_differential(
258-
df,
259-
mode=mode,
260-
polynomial_spline=int(self.data["p_spline"]),
261-
s_spline=10 ** (-float(self.data["s_spline"])),
262-
window_size_1=int(self.data["win_size_1"]),
263-
window_size_2=int(self.data["win_size_2"]),
264-
use_normalized_capacity=bool(characteristic_mass_g),
265-
)
269+
# Load comparison files if provided
270+
comparison_file_ids = self.data.get("comparison_file_ids", [])
271+
if comparison_file_ids and len(comparison_file_ids) > 0:
272+
# TODO (ben smith) Currently can't load in different masses for different files in comparison mode
273+
for file in comparison_file_ids:
274+
try:
275+
file_info = get_file_info_by_id(file, update_if_live=True)
276+
filename = file_info["name"]
277+
comparison_raw_df, comparison_cycle_summary_df = self._load(
278+
file_ids=[file], reload=False
279+
)
280+
# Mark comparison files with a prefix to distinguish them
281+
raw_dfs[f"[Comparison] {filename}"] = comparison_raw_df
282+
cycle_summary_dfs[f"[Comparison] {filename}"] = comparison_cycle_summary_df
283+
except Exception as exc:
284+
LOGGER.error("Failed to load comparison file %s: %s", file, exc)
266285

267-
# Reduce df size to 100 points per cycle by default if there are more than a 100k points
268-
if len(df) > 1e5:
269-
df = reduce_echem_cycle_sampling(df, num_samples=100)
286+
dfs = {}
287+
for filename, raw_df in raw_dfs.items():
288+
cycle_summary_df = cycle_summary_dfs.get(filename)
289+
df = filter_df_by_cycle_index(raw_df, cycle_list)
290+
if cycle_summary_df is not None:
291+
cycle_summary_df = filter_df_by_cycle_index(cycle_summary_df, cycle_list)
292+
293+
if mode in ("dQ/dV", "dV/dQ"):
294+
df = compute_gpcl_differential(
295+
df,
296+
mode=mode,
297+
polynomial_spline=int(self.data["p_spline"]),
298+
s_spline=10 ** (-float(self.data["s_spline"])),
299+
window_size_1=int(self.data["win_size_1"]),
300+
window_size_2=int(self.data["win_size_2"]),
301+
use_normalized_capacity=bool(characteristic_mass_g),
302+
)
303+
# Reduce df size to 100 points per cycle by default if there are more than a 100k points
304+
if len(df) > 1e5:
305+
df = reduce_echem_cycle_sampling(df, num_samples=100)
306+
LOGGER.debug("Reduced df size, df length: %d", len(df))
307+
df["filename"] = filename
308+
cycle_summary_df["filename"] = filename
309+
dfs[filename] = df
310+
cycle_summary_dfs[filename] = cycle_summary_df
311+
312+
# Determine plotting mode - if comparison files exist, use comparison mode
313+
plotting_mode = (
314+
"comparison"
315+
if comparison_file_ids and len(comparison_file_ids) > 0
316+
else self.data.get("mode")
317+
)
270318

271319
layout = bokeh_plots.double_axes_echem_plot(
272-
df, cycle_summary=cycle_summary_df, mode=mode, normalized=bool(characteristic_mass_g)
320+
dfs=list(dfs.values()),
321+
cycle_summary_dfs=list(cycle_summary_dfs.values()),
322+
mode=mode,
323+
normalized=bool(characteristic_mass_g),
324+
plotting_mode=plotting_mode,
273325
)
274326

275327
if layout is not None:

pydatalab/src/pydatalab/apps/echem/utils.py

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -152,7 +152,7 @@ def filter_df_by_cycle_index(df: pd.DataFrame, cycle_list: list[int] | None = No
152152

153153
if len(cycle_list) == 1 and max(cycle_list) > df["cycle index"].max():
154154
cycle_list[0] = df["cycle index"].max()
155-
return df[df["cycle index"].isin(i for i in cycle_list)]
155+
return df[df["cycle index"].isin(i for i in cycle_list)].copy()
156156

157157
try:
158158
if len(cycle_list) == 1 and 2 * max(cycle_list) > df["half cycle"].max():
@@ -166,4 +166,4 @@ def filter_df_by_cycle_index(df: pd.DataFrame, cycle_list: list[int] | None = No
166166
raise ValueError(
167167
f"Unable to parse `cycle_list` as integers: {cycle_list}. Error: {exc}"
168168
) from exc
169-
return df[df["half cycle"].isin(half_cycles)]
169+
return df[df["half cycle"].isin(half_cycles)].copy()

0 commit comments

Comments
 (0)