@@ -124,12 +124,14 @@ def _get_lambdas(fep_files: str | list[str]) -> None | list[float]:
124124
125125
126126@_init_attrs
127- def extract_u_nk (fep_files : str | list [str ], T : float ) -> pd .DataFrame :
127+ def extract_u_nk (
128+ fep_files : str | list [str ], T : float , ignore_equilibration : bool = False
129+ ) -> pd .DataFrame :
128130 """Return reduced potentials `u_nk` from NAMD fepout file(s).
129131
130132 Parameters
131133 ----------
132- fep_file : str or list of str
134+ fep_files : str or list of str
133135 Path to fepout file(s) to extract data from. These are sorted by filename,
134136 not including the path, prior to processing, using natural-sort. This way,
135137 filenames including numbers without leading zeros are handled intuitively.
@@ -144,13 +146,21 @@ def extract_u_nk(fep_files: str | list[str], T: float) -> pd.DataFrame:
144146 T : float
145147 Temperature in Kelvin at which the simulation was sampled.
146148
149+ ignore_equilibration : bool, optional
150+ If True, the parser will begin collecting energy data immediately from
151+ the start of a window, ignoring the "#STARTING COLLECTION..." line.
152+ This effectively includes the equilibration steps (defined by NAMD's
153+ alchEquilSteps) in the resulting DataFrame. Default is False.
154+
147155 Returns
148156 -------
149157 u_nk : :class:`pandas.DataFrame`
150158 Potential energy for each alchemical state (k) for each frame (n).
151159
152- Note
153- ----
160+ Notes
161+ -----
162+ Only samples collected after alchEquilSteps steps in each window are read.
163+ If post-hoc equilibrium detection is used, alchEquilSteps can be set to 0 in NAMD.
154164 If the number of forward and backward samples in a given window are different,
155165 the extra sample(s) will be discarded. This is typically zero or one sample.
156166
@@ -164,6 +174,10 @@ def extract_u_nk(fep_files: str | list[str], T: float) -> pd.DataFrame:
164174 robustness checks.
165175
166176 :param:`fep_files` can now be a list of filenames.
177+
178+ .. versionchanged:: 2.6.0
179+ Added parameter ignore_equilibration.
180+
167181 """
168182 beta = 1 / (k_b * T )
169183
@@ -176,8 +190,8 @@ def extract_u_nk(fep_files: str | list[str], T: float) -> pd.DataFrame:
176190 # create dataframe for results
177191 u_nk = pd .DataFrame (columns = ["time" , "fep-lambda" ])
178192
179- # boolean flag to parse data after equil time
180- parsing = False
193+ # Flag to detect inconsistencies like truncated windows
194+ in_window = False
181195
182196 if type (fep_files ) is str :
183197 fep_files = [fep_files ]
@@ -209,14 +223,15 @@ def extract_u_nk(fep_files: str | list[str], T: float) -> pd.DataFrame:
209223 # within the same file, then complain. This can happen if truncated fepout files
210224 # are presented in the wrong order.
211225 if line_split [0 ] == "#NEW" :
212- if parsing :
226+ if in_window :
213227 logger .error (
214228 f"Window with lambda1: { lambda1_at_start } lambda2: { lambda2_at_start } lambda_idws: { lambda_idws_at_start } appears truncated"
215229 )
216230 logger .error (
217231 f"because a new window was encountered in { fep_file } before the previous one finished."
218232 )
219233 raise ValueError ("New window begun after truncated window" )
234+ in_window = True
220235
221236 lambda1_at_start , lambda2_at_start = (
222237 float (line_split [6 ]),
@@ -229,6 +244,9 @@ def extract_u_nk(fep_files: str | list[str], T: float) -> pd.DataFrame:
229244
230245 # this line marks end of window; dump data into dataframe
231246 if line_split [0 ] == "#Free" :
247+ # Note: in_window might already be false if this window was restarted without another "#NEW" line
248+ in_window = False
249+
232250 # extract lambda values for finished window
233251 # lambda1 = sampling lambda (row), lambda2 = comparison lambda (col)
234252 lambda1 = float (line_split [7 ])
@@ -320,28 +338,30 @@ def extract_u_nk(fep_files: str | list[str], T: float) -> pd.DataFrame:
320338 win_ts = []
321339 win_de_back = []
322340 win_ts_back = []
323- parsing = False
324341 has_idws = False
325342 lambda1_at_start , lambda2_at_start , lambda_idws_at_start = (
326343 None ,
327344 None ,
328345 None ,
329346 )
347+ # end of "#Free" line processing
330348
331349 # append work value from 'dE' column of fepout file
332- if parsing :
333- if line_split [0 ] == "FepEnergy:" :
334- win_de .append (float (line_split [6 ]))
335- win_ts .append (float (line_split [1 ]))
336- elif line_split [0 ] == "FepE_back:" :
337- win_de_back .append (float (line_split [6 ]))
338- win_ts_back .append (float (line_split [1 ]))
339-
340- # Turn parsing on after line 'STARTING COLLECTION OF ENSEMBLE AVERAGE'
341- if "#STARTING" in line_split :
342- parsing = True
343-
344- if len (win_de ) != 0 or len (win_de_back ) != 0 : # pragma: no cover
350+ if line_split [0 ] == "FepEnergy:" :
351+ win_de .append (float (line_split [6 ]))
352+ win_ts .append (float (line_split [1 ]))
353+ elif line_split [0 ] == "FepE_back:" :
354+ win_de_back .append (float (line_split [6 ]))
355+ win_ts_back .append (float (line_split [1 ]))
356+
357+ # Forget previous data for this point after line 'STARTING COLLECTION OF ENSEMBLE AVERAGE'
358+ if "#STARTING" in line_split and not ignore_equilibration :
359+ win_de = []
360+ win_ts = []
361+ win_de_back = []
362+ win_ts_back = []
363+
364+ if in_window or len (win_de ) != 0 or len (win_de_back ) != 0 : # pragma: no cover
345365 logger .warning (
346366 'Trailing data without footer line ("#Free energy..."). Interrupted run?'
347367 )
@@ -358,7 +378,9 @@ def extract_u_nk(fep_files: str | list[str], T: float) -> pd.DataFrame:
358378 return u_nk
359379
360380
361- def extract (fep_files : str | list [str ], T : float ) -> dict [str , pd .DataFrame | None ]:
381+ def extract (
382+ fep_files : str | list [str ], T : float , ignore_equilibration : bool = False
383+ ) -> dict [str , pd .DataFrame | None ]:
362384 r"""Return reduced potentials `u_nk` and gradients `dH/dl`
363385 from NAMD fepout file(s).
364386
@@ -395,5 +417,5 @@ def extract(fep_files: str | list[str], T: float) -> dict[str, pd.DataFrame | No
395417 """
396418
397419 return {
398- "u_nk" : extract_u_nk (fep_files , T )
420+ "u_nk" : extract_u_nk (fep_files , T , ignore_equilibration )
399421 } # NOTE: maybe we should also have 'dHdl': None
0 commit comments