Skip to content

Commit bc0a67f

Browse files
authored
Add ignore_equilibration flag to NAMD parser (#451)
* Add ignore_equilibration flag to NAMD parser This is useful because equilibrium detection is often run after the fact anyway. Replace the parsing flag with in_window. The code can now parse files obtained with alchEquilSteps 0. * Remove zealous check * ruff format * Add tests for new NAMD parser behavior * Add flag to extract(), update CHANGES * Ignore undefined attributes
1 parent 0b3d44f commit bc0a67f

File tree

6 files changed

+80
-29
lines changed

6 files changed

+80
-29
lines changed

CHANGES

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -12,6 +12,11 @@ The rules for this file:
1212
* accompany each entry with github issue/PR number (Issue #xyz)
1313
* release numbers follow "Semantic Versioning" https://semver.org
1414

15+
01/20/2026 jhenin
16+
17+
- Add flag to ignore NAMD's equilibration in NAMD parser
18+
- Accept NAMD data with 0 equilibration steps
19+
1520
10/21/2025 xiki-tempula, jaclark5, yuxuanzhuang. orbeckst
1621

1722
* 2.5.0

src/alchemlyb/parsing/gmx.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -248,7 +248,7 @@ def extract_dHdl(xvg: str, T: float, filter: bool = True) -> pd.DataFrame:
248248
newind = ["time"] + lambdas
249249
dHdl = dHdl.reset_index().set_index(newind)
250250

251-
dHdl.name = "dH/dl"
251+
dHdl.name = "dH/dl" # type: ignore[attr-defined]
252252

253253
return dHdl
254254

src/alchemlyb/parsing/gomc.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -152,7 +152,7 @@ def extract_dHdl(filename: str, T: float) -> pd.DataFrame:
152152
newind = ["time"] + cols
153153
dHdl = dHdl.reset_index().set_index(newind)
154154

155-
dHdl.name = "dH/dl"
155+
dHdl.name = "dH/dl" # type: ignore[attr-defined]
156156

157157
return dHdl
158158

src/alchemlyb/parsing/lammps.py

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -656,7 +656,7 @@ def extract_u_nk_from_u_n(
656656
)
657657

658658
u_nk.set_index(["time", "fep-lambda"], inplace=True)
659-
u_nk.name = "u_nk"
659+
u_nk.name = "u_nk" # type: ignore[attr-defined]
660660

661661
return u_nk
662662

@@ -1012,7 +1012,7 @@ def extract_u_nk(
10121012
u_nk.set_index(["time", "fep-lambda"], inplace=True)
10131013
else:
10141014
u_nk.set_index(["time", "coul-lambda", "vdw-lambda"], inplace=True)
1015-
u_nk.name = "u_nk"
1015+
u_nk.name = "u_nk" # type: ignore[attr-defined]
10161016

10171017
u_nk = u_nk.dropna()
10181018

@@ -1115,7 +1115,7 @@ def extract_dHdl_from_u_n(
11151115

11161116
dHdl.set_index(["time", "fep-lambda"], inplace=True)
11171117
dHdl["fep"] = dHdl["fep"] * beta
1118-
dHdl.name = "dH_dl"
1118+
dHdl.name = "dH_dl" # type: ignore[attr-defined]
11191119

11201120
return dHdl
11211121

@@ -1311,7 +1311,7 @@ def extract_dHdl(
13111311
else:
13121312
dHdl["coul"] = dHdl["coul"] * beta
13131313

1314-
dHdl.name = "dH_dl"
1314+
dHdl.name = "dH_dl" # type: ignore[attr-defined]
13151315

13161316
return dHdl
13171317

src/alchemlyb/parsing/namd.py

Lines changed: 45 additions & 23 deletions
Original file line numberDiff line numberDiff line change
@@ -124,12 +124,14 @@ def _get_lambdas(fep_files: str | list[str]) -> None | list[float]:
124124

125125

126126
@_init_attrs
127-
def extract_u_nk(fep_files: str | list[str], T: float) -> pd.DataFrame:
127+
def extract_u_nk(
128+
fep_files: str | list[str], T: float, ignore_equilibration: bool = False
129+
) -> pd.DataFrame:
128130
"""Return reduced potentials `u_nk` from NAMD fepout file(s).
129131
130132
Parameters
131133
----------
132-
fep_file : str or list of str
134+
fep_files : str or list of str
133135
Path to fepout file(s) to extract data from. These are sorted by filename,
134136
not including the path, prior to processing, using natural-sort. This way,
135137
filenames including numbers without leading zeros are handled intuitively.
@@ -144,13 +146,21 @@ def extract_u_nk(fep_files: str | list[str], T: float) -> pd.DataFrame:
144146
T : float
145147
Temperature in Kelvin at which the simulation was sampled.
146148
149+
ignore_equilibration : bool, optional
150+
If True, the parser will begin collecting energy data immediately from
151+
the start of a window, ignoring the "#STARTING COLLECTION..." line.
152+
This effectively includes the equilibration steps (defined by NAMD's
153+
alchEquilSteps) in the resulting DataFrame. Default is False.
154+
147155
Returns
148156
-------
149157
u_nk : :class:`pandas.DataFrame`
150158
Potential energy for each alchemical state (k) for each frame (n).
151159
152-
Note
153-
----
160+
Notes
161+
-----
162+
Only samples collected after alchEquilSteps steps in each window are read.
163+
If post-hoc equilibrium detection is used, alchEquilSteps can be set to 0 in NAMD.
154164
If the number of forward and backward samples in a given window are different,
155165
the extra sample(s) will be discarded. This is typically zero or one sample.
156166
@@ -164,6 +174,10 @@ def extract_u_nk(fep_files: str | list[str], T: float) -> pd.DataFrame:
164174
robustness checks.
165175
166176
:param:`fep_files` can now be a list of filenames.
177+
178+
.. versionchanged:: 2.6.0
179+
Added parameter ignore_equilibration.
180+
167181
"""
168182
beta = 1 / (k_b * T)
169183

@@ -176,8 +190,8 @@ def extract_u_nk(fep_files: str | list[str], T: float) -> pd.DataFrame:
176190
# create dataframe for results
177191
u_nk = pd.DataFrame(columns=["time", "fep-lambda"])
178192

179-
# boolean flag to parse data after equil time
180-
parsing = False
193+
# Flag to detect inconsistencies like truncated windows
194+
in_window = False
181195

182196
if type(fep_files) is str:
183197
fep_files = [fep_files]
@@ -209,14 +223,15 @@ def extract_u_nk(fep_files: str | list[str], T: float) -> pd.DataFrame:
209223
# within the same file, then complain. This can happen if truncated fepout files
210224
# are presented in the wrong order.
211225
if line_split[0] == "#NEW":
212-
if parsing:
226+
if in_window:
213227
logger.error(
214228
f"Window with lambda1: {lambda1_at_start} lambda2: {lambda2_at_start} lambda_idws: {lambda_idws_at_start} appears truncated"
215229
)
216230
logger.error(
217231
f"because a new window was encountered in {fep_file} before the previous one finished."
218232
)
219233
raise ValueError("New window begun after truncated window")
234+
in_window = True
220235

221236
lambda1_at_start, lambda2_at_start = (
222237
float(line_split[6]),
@@ -229,6 +244,9 @@ def extract_u_nk(fep_files: str | list[str], T: float) -> pd.DataFrame:
229244

230245
# this line marks end of window; dump data into dataframe
231246
if line_split[0] == "#Free":
247+
# Note: in_window might already be false if this window was restarted without another "#NEW" line
248+
in_window = False
249+
232250
# extract lambda values for finished window
233251
# lambda1 = sampling lambda (row), lambda2 = comparison lambda (col)
234252
lambda1 = float(line_split[7])
@@ -320,28 +338,30 @@ def extract_u_nk(fep_files: str | list[str], T: float) -> pd.DataFrame:
320338
win_ts = []
321339
win_de_back = []
322340
win_ts_back = []
323-
parsing = False
324341
has_idws = False
325342
lambda1_at_start, lambda2_at_start, lambda_idws_at_start = (
326343
None,
327344
None,
328345
None,
329346
)
347+
# end of "#Free" line processing
330348

331349
# append work value from 'dE' column of fepout file
332-
if parsing:
333-
if line_split[0] == "FepEnergy:":
334-
win_de.append(float(line_split[6]))
335-
win_ts.append(float(line_split[1]))
336-
elif line_split[0] == "FepE_back:":
337-
win_de_back.append(float(line_split[6]))
338-
win_ts_back.append(float(line_split[1]))
339-
340-
# Turn parsing on after line 'STARTING COLLECTION OF ENSEMBLE AVERAGE'
341-
if "#STARTING" in line_split:
342-
parsing = True
343-
344-
if len(win_de) != 0 or len(win_de_back) != 0: # pragma: no cover
350+
if line_split[0] == "FepEnergy:":
351+
win_de.append(float(line_split[6]))
352+
win_ts.append(float(line_split[1]))
353+
elif line_split[0] == "FepE_back:":
354+
win_de_back.append(float(line_split[6]))
355+
win_ts_back.append(float(line_split[1]))
356+
357+
# Forget previous data for this point after line 'STARTING COLLECTION OF ENSEMBLE AVERAGE'
358+
if "#STARTING" in line_split and not ignore_equilibration:
359+
win_de = []
360+
win_ts = []
361+
win_de_back = []
362+
win_ts_back = []
363+
364+
if in_window or len(win_de) != 0 or len(win_de_back) != 0: # pragma: no cover
345365
logger.warning(
346366
'Trailing data without footer line ("#Free energy..."). Interrupted run?'
347367
)
@@ -358,7 +378,9 @@ def extract_u_nk(fep_files: str | list[str], T: float) -> pd.DataFrame:
358378
return u_nk
359379

360380

361-
def extract(fep_files: str | list[str], T: float) -> dict[str, pd.DataFrame | None]:
381+
def extract(
382+
fep_files: str | list[str], T: float, ignore_equilibration: bool = False
383+
) -> dict[str, pd.DataFrame | None]:
362384
r"""Return reduced potentials `u_nk` and gradients `dH/dl`
363385
from NAMD fepout file(s).
364386
@@ -395,5 +417,5 @@ def extract(fep_files: str | list[str], T: float) -> dict[str, pd.DataFrame | No
395417
"""
396418

397419
return {
398-
"u_nk": extract_u_nk(fep_files, T)
420+
"u_nk": extract_u_nk(fep_files, T, ignore_equilibration)
399421
} # NOTE: maybe we should also have 'dHdl': None

src/alchemlyb/tests/parsing/test_namd.py

Lines changed: 24 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -441,3 +441,27 @@ def test_u_nk_restarted_last_window_truncated(restarted_dataset_last_window_trun
441441

442442
with pytest.raises(ValueError, match="Last window is truncated"):
443443
extract_u_nk(restarted_dataset_last_window_truncated["data"]["both"], T=300)
444+
445+
446+
def test_u_nk_ignore_equilibration(dataset):
447+
"""Test that ignore_equilibration=True returns all data"""
448+
449+
original_file = dataset["data"]["forward"][0]
450+
u_nk_original = extract_u_nk(original_file, T=300)
451+
452+
# New behavior: Should return the full dataset (including equilibration)
453+
u_nk_ignored = extract_u_nk(original_file, T=300, ignore_equilibration=True)
454+
assert len(u_nk_ignored) > len(u_nk_original)
455+
456+
457+
def test_u_nk_no_equilibration(dataset, tmp_path):
458+
"""Test that all data is read when the #STARTING line is missing from the fepout file"""
459+
460+
original_file = dataset["data"]["forward"][0]
461+
462+
no_starting_file = _corrupt_fepout(
463+
original_file, [("#STARTING", lambda tokens: None)], tmp_path
464+
)
465+
466+
u_nk_no_equilibration = extract_u_nk(no_starting_file, T=300)
467+
assert len(u_nk_no_equilibration) > 0

0 commit comments

Comments
 (0)