weitse-hsu
diff --git a/‎ensemble_md/analysis/analyze_free_energy.py‎
Lines changed: 71 additions & 12 deletions b/‎ensemble_md/analysis/analyze_free_energy.py‎
Lines changed: 71 additions & 12 deletions
diff --git a/‎ensemble_md/analysis/analyze_traj.py‎
Lines changed: 62 additions & 13 deletions b/‎ensemble_md/analysis/analyze_traj.py‎
Lines changed: 62 additions & 13 deletions
@@ -189,6 +189,8 @@ def _combine_df_adjacent(df_adjacent, state_ranges, df_err_adjacent=None, err_ty
         A list of lists free energy differences between adjacent states for all replicas.
     state_ranges : list
         A list of lists of showing the state indices sampled by each replica.
+    n_tot : int
+        Number of lambda states
     df_err_adjacent : list, Optional
         A list of lists of uncertainties corresponding to the values of :code:`df_adjacent`. Notably, if
         :code:`df_err_adjacent` is :code:`None`, simple means will be used. Otherwise, inverse-variance weighted
@@ -247,7 +249,48 @@ def _combine_df_adjacent(df_adjacent, state_ranges, df_err_adjacent=None, err_ty
     return df, df_err, overlap_bool
 
 
-def calculate_free_energy(data, state_ranges, df_method="MBAR", err_method="propagate", n_bootstrap=None, seed=None):
+def _calculate_df(estimators):
+    """
+    An internal function used in :func:`calculate_free_energy` to calculate a list of free energies between adjacent
+    states for all replicas.
+
+    Parameters
+    ----------
+    estimators : list
+        A list of estimators fitting the input data for all replicas. With this, the user
+        can access all the free energies and their associated uncertainties for all states and replicas.
+        In our code, these estimators come from the function :func:`_apply_estimators`.
+
+    Returns
+    -------
+    df : float
+        Free energy differences between for specified replica.
+    df_err : float
+        Uncertainties corresponding to the values in :code:`df`.
+
+    See also
+    --------
+    :func:`calculate_free_energy`
+    """
+    # Compute FE estimate
+    df = estimators[0].delta_f_
+    l = np.linspace(0, 1, num=len(df.index))
+    df.index = l
+    df.columns = l
+    est = df.loc[0, 1]
+    print(df)
+
+    # Compute FE extimate error
+    df_err = estimators[0].d_delta_f_
+    l = np.linspace(0, 1, num=len(df_err.index))
+    df_err.index = l
+    df_err.columns = l
+    err = df_err.loc[0, 1]
+
+    return est, err
+
+
+def calculate_free_energy(data, state_ranges, df_method="MBAR", err_method="propagate", n_bootstrap=None, seed=None, MTREXEE=False):  # noqa: E501
     """
     Caculates the averaged free energy profile with the chosen method given :math:`u_{nk}` or :math:`dH/dλ` data
     obtained from all replicas of the REXEE simulation. Available methods include TI, BAR, and MBAR. TI
@@ -275,6 +318,8 @@ def calculate_free_energy(data, state_ranges, df_method="MBAR", err_method="prop
     seed : int, Optional
         The random seed for bootstrapping. Only relevant when :code:`err_method` is :code:`"bootstrap"`.
         The default is :code:`None`.
+    MTREXEE : bool
+        Whether this is a MT-REXEE simulation or not
 
     Returns
     -------
@@ -299,10 +344,17 @@ def calculate_free_energy(data, state_ranges, df_method="MBAR", err_method="prop
         >>> f, _, _ = analyze_free_energy.calculate_free_energy(data_list, state_ranges, "MBAR", "propagate")
     """
     n_sim = len(data)
-    n_tot = state_ranges[-1][-1] + 1
+    if MTREXEE is False:
+        n_tot = state_ranges[-1][-1] + 1
+    else:
+        n_tot = state_ranges[-1] + 1
     estimators = _apply_estimators(data, df_method)
-    df_adjacent, df_err_adjacent = _calculate_df_adjacent(estimators)
-    df, df_err, overlap_bool = _combine_df_adjacent(df_adjacent, state_ranges, df_err_adjacent, err_type='propagate')
+    print(estimators)
+    if MTREXEE is False:
+        df_adjacent, df_err_adjacent = _calculate_df_adjacent(estimators)
+        df, df_err, overlap_bool = _combine_df_adjacent(df_adjacent, state_ranges, df_err_adjacent, err_type='propagate')  # noqa: E501
+    else:
+        df, df_err = _calculate_df(estimators)
 
     if err_method == 'bootstrap':
         if seed is not None:
@@ -314,26 +366,33 @@ def calculate_free_energy(data, state_ranges, df_method="MBAR", err_method="prop
         for b in range(n_bootstrap):
             sampled_data = [sampled_data_all[i].iloc[b * len(data[i]):(b + 1) * len(data[i])] for i in range(n_sim)]
             bootstrap_estimators = _apply_estimators(sampled_data, df_method)
-            df_adjacent, df_err_adjacent = _calculate_df_adjacent(bootstrap_estimators)
-            df_sampled, _, overlap_bool = _combine_df_adjacent(df_adjacent, state_ranges, df_err_adjacent, err_type='propagate')  # doesn't matter what value err_type here is # noqa: E501
+            if MTREXEE is False:
+                df_adjacent, df_err_adjacent = _calculate_df_adjacent(bootstrap_estimators)
+                df_sampled, _, overlap_bool = _combine_df_adjacent(df_adjacent, state_ranges, df_err_adjacent, err_type='propagate')  # doesn't matter what value err_type here is # noqa: E501
+            else:
+                df_sampled, _ = _calculate_df(bootstrap_estimators)
             df_bootstrap.append(df_sampled)
         error_bootstrap = np.std(df_bootstrap, axis=0, ddof=1)
 
         # Replace the value in df_err with value in error_bootstrap if df_err corresponds to
         # the df between overlapping states
         for i in range(n_tot - 1):
-            if overlap_bool[i] is True:
+            if MTREXEE is True or overlap_bool[i] is True:
                 print(f'Replaced the propagated error with the bootstrapped error for states {i} and {i + 1}: {df_err[i]:.5f} -> {error_bootstrap[i]:.5f}.')  # noqa: E501
                 df_err[i] = error_bootstrap[i]
     elif err_method == 'propagate':
         pass
     else:
         raise ParameterError('Specified err_method not available.')
-
-    df.insert(0, 0)
-    df_err.insert(0, 0)
-    f = [sum(df[:(i + 1)]) for i in range(len(df))]
-    f_err = [np.sqrt(sum([x**2 for x in df_err[:(i+1)]])) for i in range(len(df_err))]
+    
+    if MTREXEE is False:
+        df.insert(0, 0)
+        df_err.insert(0, 0)
+        f = [sum(df[:(i + 1)]) for i in range(len(df))]
+        f_err = [np.sqrt(sum([x**2 for x in df_err[:(i+1)]])) for i in range(len(df_err))]
+    else:
+        f = df
+        f_err = df_err
 
     return f, f_err, estimators
 
 
@@ -106,6 +106,7 @@ def stitch_time_series(files, rep_trajs, shifts=None, dhdl=True, col_idx=-1, sav
     # files_sorted[i] contains the dhdl/plumed output files for starting configuration i sorted
     # based on iteration indices
     files_sorted = [[] for i in range(n_configs)]
+    print(n_iter)
     for i in range(n_configs):
         for j in range(n_iter):
             files_sorted[i].append(files[rep_trajs[i][j]][j])
@@ -185,6 +186,8 @@ def stitch_time_series_for_sim(files, shifts=None, dhdl=True, col_idx=-1, save_n
     :func:`.stitch_time_series`
     :func:`.stitch_xtc_trajs`
     """
+    #if os.path.exists('track_swap_frame.npy'):
+
     n_sim = len(files)      # number of replicas
     n_iter = len(files[0])  # number of iterations per replica
     trajs = [[] for i in range(n_sim)]
@@ -543,6 +546,7 @@ def plot_state_hist(trajs, state_ranges, fig_name, stack=True, figsize=None, pre
             dir_list = []
             for i in fig_name.split('/')[:-1]:
                 dir_list.append(i)
+                dir_list.append('/')
             dir_path = ''.join(dir_list)
             np.save(f'{dir_path}/hist_data.npy', hist_data)
         else:
@@ -833,7 +837,7 @@ def plot_transit_time(trajs, N, fig_prefix=None, dt=None, folder='.'):
                     plt.savefig(f'{folder}/hist_{fig_names[t]}', dpi=600)
                 else:
                     plt.savefig(f'{folder}/{fig_prefix}_hist_{fig_names[t]}', dpi=600)
-    #Save to csv
+    # Save to csv
     sim_list, rt_list = [], []
     for n in range(len(t_roundtrip_list)):
         for rt in t_roundtrip_list[n]:
@@ -1350,6 +1354,30 @@ def get_delta_w_updates(log_file, plot=False):
 
 
 def end_states_only_traj(working_dir, n_sim, n_iter, l0_states, l1_states, swap_rep_pattern, ps_per_frame):
+    """
+    Create a trajectory which is a concatenation off all frames for each unique end state.
+
+    Parameters
+    ----------
+    working_dir : str
+        path for the current working directory
+    n_sim : int
+        the number of simulations run
+    n_iter : int
+        the number of iterations run
+    l0_states : list of int
+        the lambda states which correspond to lambda=0
+    l1_states : list of int
+        the lambda states which correspond to lambda=1
+    swap_rep_pattern : list of int
+        the replica swapping pattern which will indicate which end states are common
+    ps_per_frame : float
+        the timestep to convert the time in the GROMACS dh/dl file to frames in the trajecotry
+
+    Returns
+    -------
+    None
+    """
     import pandas as pd
     import os
     import mdtraj as md
@@ -1434,18 +1462,39 @@ def end_states_only_traj(working_dir, n_sim, n_iter, l0_states, l1_states, swap_
                         traj = md.join(traj, traj_add)
             traj.save_xtc(f'{working_dir}/analysis/{state}_{rep}.xtc')
 
-def concat_sim_traj(working_dir, n_sim, n_iter):
+
+def concat_sim_traj(working_dir, n_sim, n_iter, gro):
+    """
+    Create a trajectory which is a concatenation off each iterations trajectory
+
+    Parameters
+    ----------
+    working_dir : str
+        path for the current working directory
+    n_sim : int
+        the number of simulations run
+    n_iter : int
+        the number of iterations run
+
+    Returns
+    -------
+    None
+    """
     import mdtraj as md
     import os
-
+    from tqdm import tqdm
+    
     for rep in range(n_sim):
-        if os.path.exists(f'{working_dir}/sim_{rep}/iteration_0/confout_backup.gro'):
-            name = 'confout_backup'
-        else:
-            name = 'confout'
-
-        traj = md.load(f'{working_dir}/sim_{rep}/iteration_0/traj.trr', top=f'{working_dir}/sim_{rep}/iteration_0/{name}.gro')
-        for iteration in range(1, n_iter):
-            traj_add = md.load(f'{working_dir}/sim_{rep}/iteration_{iteration}/traj.trr', top=f'{working_dir}/sim_{rep}/iteration_0/{name}.gro')
-            traj = md.join([traj, traj_add])
-        traj.save_xtc(f'{working_dir}/analysis/sim{rep}_concat.xtc')
+        if not os.path.exists(f'{working_dir}/analysis/sim{rep}_concat.xtc'):
+            if os.path.exists(f'{working_dir}/sim_{rep}/iteration_0/confout_backup.gro'):
+                name = 'confout_backup'
+            else:
+                name = 'confout'
+            gro_ref = md.load(f'{working_dir}/{gro[rep]}')
+            traj = md.load(f'{working_dir}/sim_{rep}/iteration_0/traj.trr', top=f'{working_dir}/sim_{rep}/iteration_0/{name}.gro')  # noqa: E501
+            traj.superpose(gro_ref, frame=0)
+            for iteration in tqdm(range(1, n_iter)):
+                traj_add = md.load(f'{working_dir}/sim_{rep}/iteration_{iteration}/traj.trr', top=f'{working_dir}/sim_{rep}/iteration_0/{name}.gro')  # noqa: E501
+                traj_add.superpose(gro_ref, frame=0)
+                traj = md.join([traj, traj_add[1:]])
+            traj.save_xtc(f'{working_dir}/analysis/sim{rep}_concat.xtc')