Fix formatting for tests

ajfriedman22 · ajfriedman22 · commit 1660df1389d2 · 2025-10-19T07:18:49.000-06:00
diff --git a/ensemble_md/replica_exchange_EE.py b/ensemble_md/replica_exchange_EE.py
@@ -164,6 +164,7 @@ def set_params(self, analysis):
             "resname_transform": None,
             "resname_list": None,
             "swap_rep_pattern": None,
+            "allow_virtual_V": False, # Allow use of V to indicate virtual atoms in topology
             "nst_sim": None,
             "proposal": 'exhaustive',
             "w_combine": False,
@@ -255,7 +256,7 @@ def set_params(self, analysis):
         if self.working_dir == '':
             self.working_dir = '.'
 
-        params_bool = ['verbose', 'rm_cpt', 'msm', 'free_energy', 'subsampling_avg', 'w_combine']
+        params_bool = ['verbose', 'rm_cpt', 'msm', 'free_energy', 'subsampling_avg', 'w_combine', 'allow_virtual_V']
         for i in params_bool:
             if type(getattr(self, i)) != bool:
                 raise ParameterError(f"The parameter '{i}' should be a boolean variable.")
@@ -1720,7 +1721,7 @@ def process_top(self):
         function. Output as csv files to prevent needing to re-run this step.
         """
         if not os.path.exists('atom_name_mapping.csv') and self.resname_list is not None:
-            coordinate_swap.create_atom_map(self.gro, self.resname_list, self.swap_rep_pattern)
+            coordinate_swap.create_atom_map(self.gro, self.resname_list, self.swap_rep_pattern, self.allow_virtual_V)
             atom_name_mapping = pd.read_csv('atom_name_mapping.csv')
         elif self.resname_list is not None:
             atom_name_mapping = pd.read_csv('atom_name_mapping.csv')
diff --git a/ensemble_md/tests/data/coord_swap/atom_name_mapping.csv b/ensemble_md/tests/data/coord_swap/atom_name_mapping.csv
@@ -1,125 +1,74 @@
 ,resname A,resname B,atomid A,atom name A,atomid B,atom name B
 0,A2B,B2C,1,S1,1,S1
-1,A2B,B2C,1,S1,1,S1
-2,A2B,B2C,2,C2,2,C2
-3,A2B,B2C,2,C2,2,C2
-4,A2B,B2C,3,N3,3,N3
-5,A2B,B2C,3,N3,3,N3
-6,A2B,B2C,4,C4,4,C4
-7,A2B,B2C,4,C4,4,C4
-8,A2B,B2C,5,C5,5,C5
-9,A2B,B2C,5,C5,5,C5
-10,A2B,B2C,6,C6,6,C6
-11,A2B,B2C,6,C6,6,C6
-12,A2B,B2C,7,H1,8,H1
-13,A2B,B2C,7,H1,8,H1
-14,A2B,B2C,8,H2,9,H2
-15,A2B,B2C,8,H2,9,H2
-16,A2B,B2C,9,H3,10,H3
-17,A2B,B2C,9,H3,10,H3
-18,A2B,B2C,10,H4,11,H4
-19,A2B,B2C,10,H4,11,H4
-20,A2B,B2C,12,DC7,7,C7
-21,A2B,B2C,13,HV5,12,H5
-22,A2B,B2C,14,HV6,13,H6
-23,A2B,B2C,15,HV7,14,H7
+1,A2B,B2C,2,C2,2,C2
+2,A2B,B2C,3,N3,3,N3
+3,A2B,B2C,4,C4,4,C4
+4,A2B,B2C,5,C5,5,C5
+5,A2B,B2C,6,C6,6,C6
+6,A2B,B2C,7,H1,8,H1
+7,A2B,B2C,8,H2,9,H2
+8,A2B,B2C,9,H3,10,H3
+9,A2B,B2C,10,H4,11,H4
+10,A2B,B2C,12,DC7,7,C7
+11,A2B,B2C,13,HV5,12,H5
+12,A2B,B2C,14,HV6,13,H6
+13,A2B,B2C,15,HV7,14,H7
 0,B2C,C2D,1,S1,1,S1
-1,B2C,C2D,1,S1,1,S1
-2,B2C,C2D,2,C2,2,C2
-3,B2C,C2D,2,C2,2,C2
-4,B2C,C2D,3,N3,3,N3
-5,B2C,C2D,3,N3,3,N3
-6,B2C,C2D,4,C4,4,C4
-7,B2C,C2D,4,C4,4,C4
-8,B2C,C2D,5,C5,5,C5
-9,B2C,C2D,5,C5,5,C5
-10,B2C,C2D,6,C6,6,C6
-11,B2C,C2D,6,C6,6,C6
-12,B2C,C2D,7,C7,7,C7
-13,B2C,C2D,7,C7,7,C7
-14,B2C,C2D,8,H1,9,H1
-15,B2C,C2D,8,H1,9,H1
-16,B2C,C2D,9,H2,10,H2
-17,B2C,C2D,9,H2,10,H2
-18,B2C,C2D,10,H3,11,H3
-19,B2C,C2D,10,H3,11,H3
-20,B2C,C2D,11,H4,12,H4
-21,B2C,C2D,11,H4,12,H4
-22,B2C,C2D,12,H5,19,HV5
-23,B2C,C2D,13,H6,13,H6
-24,B2C,C2D,13,H6,13,H6
-25,B2C,C2D,14,H7,14,H7
-26,B2C,C2D,14,H7,14,H7
-27,B2C,C2D,15,DC8,8,C8
-28,B2C,C2D,16,HV8,15,H8
-29,B2C,C2D,17,HV9,16,H9
-30,B2C,C2D,18,HV10,17,H10
+1,B2C,C2D,2,C2,2,C2
+2,B2C,C2D,3,N3,3,N3
+3,B2C,C2D,4,C4,4,C4
+4,B2C,C2D,5,C5,5,C5
+5,B2C,C2D,6,C6,6,C6
+6,B2C,C2D,7,C7,7,C7
+7,B2C,C2D,8,H1,9,H1
+8,B2C,C2D,9,H2,10,H2
+9,B2C,C2D,10,H3,11,H3
+10,B2C,C2D,11,H4,12,H4
+11,B2C,C2D,12,H5,19,HV5
+12,B2C,C2D,13,H6,13,H6
+13,B2C,C2D,14,H7,14,H7
+14,B2C,C2D,15,DC8,8,C8
+15,B2C,C2D,16,HV8,15,H8
+16,B2C,C2D,17,HV9,16,H9
+17,B2C,C2D,18,HV10,17,H10
 0,C2D,D2E,1,S1,1,S1
-1,C2D,D2E,1,S1,1,S1
-2,C2D,D2E,2,C2,2,C2
-3,C2D,D2E,2,C2,2,C2
-4,C2D,D2E,3,N3,3,N3
-5,C2D,D2E,3,N3,3,N3
-6,C2D,D2E,4,C4,4,C4
-7,C2D,D2E,4,C4,4,C4
-8,C2D,D2E,5,C5,5,C5
-9,C2D,D2E,5,C5,5,C5
-10,C2D,D2E,6,C6,6,C6
-11,C2D,D2E,6,C6,6,C6
-12,C2D,D2E,7,C7,7,C7
-13,C2D,D2E,7,C7,7,C7
-14,C2D,D2E,8,C8,18,DC8
-15,C2D,D2E,9,H1,9,H1
-16,C2D,D2E,9,H1,9,H1
-17,C2D,D2E,10,H2,10,H2
-18,C2D,D2E,10,H2,10,H2
-19,C2D,D2E,11,H3,11,H3
-20,C2D,D2E,11,H3,11,H3
-21,C2D,D2E,13,H6,13,H6
-22,C2D,D2E,13,H6,13,H6
-23,C2D,D2E,14,H7,14,H7
-24,C2D,D2E,14,H7,14,H7
-25,C2D,D2E,15,H8,19,HV8
-26,C2D,D2E,16,H9,20,HV9
-27,C2D,D2E,17,H10,21,HV10
-28,C2D,D2E,18,DC9,8,C9
-29,C2D,D2E,19,HV5,12,H5
-30,C2D,D2E,20,HV11,15,H11
-31,C2D,D2E,21,HV12,16,H12
-32,C2D,D2E,22,HV13,17,H13
+1,C2D,D2E,2,C2,2,C2
+2,C2D,D2E,3,N3,3,N3
+3,C2D,D2E,4,C4,4,C4
+4,C2D,D2E,5,C5,5,C5
+5,C2D,D2E,6,C6,6,C6
+6,C2D,D2E,7,C7,7,C7
+7,C2D,D2E,8,C8,18,DC8
+8,C2D,D2E,9,H1,9,H1
+9,C2D,D2E,10,H2,10,H2
+10,C2D,D2E,11,H3,11,H3
+11,C2D,D2E,13,H6,13,H6
+12,C2D,D2E,14,H7,14,H7
+13,C2D,D2E,15,H8,19,HV8
+14,C2D,D2E,16,H9,20,HV9
+15,C2D,D2E,17,H10,21,HV10
+16,C2D,D2E,18,DC9,8,C9
+17,C2D,D2E,19,HV5,12,H5
+18,C2D,D2E,20,HV11,15,H11
+19,C2D,D2E,21,HV12,16,H12
+20,C2D,D2E,22,HV13,17,H13
 0,D2E,E2F,1,S1,1,S1
-1,D2E,E2F,1,S1,1,S1
-2,D2E,E2F,2,C2,2,C2
-3,D2E,E2F,2,C2,2,C2
-4,D2E,E2F,3,N3,3,N3
-5,D2E,E2F,3,N3,3,N3
-6,D2E,E2F,4,C4,4,C4
-7,D2E,E2F,4,C4,4,C4
-8,D2E,E2F,5,C5,5,C5
-9,D2E,E2F,5,C5,5,C5
-10,D2E,E2F,6,C6,6,C6
-11,D2E,E2F,6,C6,6,C6
-12,D2E,E2F,7,C7,7,C7
-13,D2E,E2F,7,C7,7,C7
-14,D2E,E2F,8,C9,9,C9
-15,D2E,E2F,8,C9,9,C9
-16,D2E,E2F,9,H1,10,H1
-17,D2E,E2F,9,H1,10,H1
-18,D2E,E2F,10,H2,11,H2
-19,D2E,E2F,10,H2,11,H2
-20,D2E,E2F,11,H3,12,H3
-21,D2E,E2F,11,H3,12,H3
-22,D2E,E2F,13,H6,13,H6
-23,D2E,E2F,13,H6,13,H6
-24,D2E,E2F,14,H7,14,H7
-25,D2E,E2F,14,H7,14,H7
-26,D2E,E2F,15,H11,18,H11
-27,D2E,E2F,15,H11,18,H11
-28,D2E,E2F,16,H12,19,H12
-29,D2E,E2F,16,H12,19,H12
-30,D2E,E2F,17,H13,20,H13
-31,D2E,E2F,17,H13,20,H13
-32,D2E,E2F,18,DC8,8,C8
-33,D2E,E2F,19,HV8,15,H8
-34,D2E,E2F,20,HV9,16,H9
-35,D2E,E2F,21,HV10,17,H10
+1,D2E,E2F,2,C2,2,C2
+2,D2E,E2F,3,N3,3,N3
+3,D2E,E2F,4,C4,4,C4
+4,D2E,E2F,5,C5,5,C5
+5,D2E,E2F,6,C6,6,C6
+6,D2E,E2F,7,C7,7,C7
+7,D2E,E2F,8,C9,9,C9
+8,D2E,E2F,9,H1,10,H1
+9,D2E,E2F,10,H2,11,H2
+10,D2E,E2F,11,H3,12,H3
+11,D2E,E2F,13,H6,13,H6
+12,D2E,E2F,14,H7,14,H7
+13,D2E,E2F,15,H11,18,H11
+14,D2E,E2F,16,H12,19,H12
+15,D2E,E2F,17,H13,20,H13
+16,D2E,E2F,18,DC8,8,C8
+17,D2E,E2F,19,HV8,15,H8
+18,D2E,E2F,20,HV9,16,H9
+19,D2E,E2F,21,HV10,17,H10
diff --git a/ensemble_md/tests/test_coordinate_swap.py b/ensemble_md/tests/test_coordinate_swap.py
@@ -58,13 +58,13 @@ def test_fix_break():
     broken_mol = md.load(f'{input_path}/coord_swap/broken_mol_1D.gro')
     df_connect = pd.read_csv(f'{input_path}/coord_swap/residue_connect.csv')
     df_connect_res = df_connect[df_connect['Resname'] == 'C2D']
-    test_fix = coordinate_swap.fix_break(broken_mol, 'C2D', [2.74964, 2.74964, 2.74964], df_connect_res)
+    test_fix = coordinate_swap.fix_break(broken_mol, 'C2D', [2.74964, 2.74964, 2.74964], df_connect_res, False, 1)
 
     broken_mol_3D = md.load(f'{input_path}/coord_swap/broken_mol_3D.gro')
-    test_fix_3D = coordinate_swap.fix_break(broken_mol_3D, 'C2D', [2.74964, 2.74964, 2.74964], df_connect_res)
+    test_fix_3D = coordinate_swap.fix_break(broken_mol_3D, 'C2D', [2.74964, 2.74964, 2.74964], df_connect_res, False, 1)
 
     already_fixed = md.load(f'{input_path}/coord_swap/fixed_mol.gro')
-    still_fixed = coordinate_swap.fix_break(already_fixed, 'C2D', [2.74964, 2.74964, 2.74964], df_connect_res)
+    still_fixed = coordinate_swap.fix_break(already_fixed, 'C2D', [2.74964, 2.74964, 2.74964], df_connect_res, False, 1)
 
     fixed_mol = md.load(f'{input_path}/coord_swap/fixed_mol.gro')
 
@@ -139,8 +139,8 @@ def test_get_miss_coord():
 
     A_dimensions = coordinate_swap.get_dimensions(open(molA_file, 'r').readlines())
     B_dimensions = coordinate_swap.get_dimensions(open(molB_file, 'r').readlines())
-    molA = coordinate_swap.fix_break(molA, nameA, A_dimensions, connection_map[connection_map['Resname'] == nameA])
-    molB = coordinate_swap.fix_break(molB, nameB, B_dimensions, connection_map[connection_map['Resname'] == nameB])
+    molA = coordinate_swap.fix_break(molA, nameA, A_dimensions, connection_map[connection_map['Resname'] == nameA], False)
+    molB = coordinate_swap.fix_break(molB, nameB, B_dimensions, connection_map[connection_map['Resname'] == nameB], False)
 
     df_no_coords = pd.read_csv(f'{input_path}/coord_swap/extract_missing.csv')
     df = pd.read_csv(f'{input_path}/coord_swap/df_atom_swap.csv')
@@ -273,7 +273,7 @@ def test_get_names():
     top_files = ['A-B.itp', 'B-C.itp', 'C-D.itp', 'D-E.itp', 'E-F.itp']
     resnames = ['A2B', 'B2C', 'C2D', 'D2E', 'E2F']
 
-    start_lines = [26, 29, 33, 32, 36]
+    start_lines = [27, 30, 34, 33, 37]
     names = [['S1', 'C2', 'N3', 'C4', 'C5', 'C6', 'H1', 'H2', 'H3', 'H4', 'H17', 'DC7', 'HV5', 'HV6', 'HV7'], ['S1', 'C2', 'N3', 'C4', 'C5', 'C6', 'C7', 'H1', 'H2', 'H3', 'H4', 'H5', 'H6', 'H7', 'DC8', 'HV8', 'HV9', 'HV10'], ['S1', 'C2', 'N3', 'C4', 'C5', 'C6', 'C7', 'C8', 'H1', 'H2', 'H3', 'H4', 'H6', 'H7', 'H8', 'H9', 'H10', 'DC9', 'HV5', 'HV11', 'HV12', 'HV13'], ['S1', 'C2', 'N3', 'C4', 'C5', 'C6', 'C7', 'C9', 'H1', 'H2', 'H3', 'H5', 'H6', 'H7', 'H11', 'H12', 'H13', 'DC8', 'HV8', 'HV9', 'HV10'], ['S1', 'C2', 'N3', 'C4', 'C5', 'C6', 'C7', 'C8', 'C9', 'H1', 'H2', 'H3', 'H6', 'H7', 'H8', 'H9', 'H10', 'H11', 'H12', 'H13', 'DC10', 'HV4', 'HV14', 'HV15', 'HV16']]  # noqa: E501
 
     lambda_states = [[-1, -1, -1, -1, -1, -1, -1, -1, -1, -1, 1, 0, 0, 0, 0],
@@ -345,7 +345,7 @@ def test_create_atom_map():
     swap_pattern = [[[0, 1], [1, 0]], [[1, 1], [2, 0]], [[2, 1], [3, 0]], [[3, 1], [4, 0]]]
 
     atom_name_mapping_true = pd.read_csv(f'{input_path}/coord_swap/atom_name_mapping.csv')
-    coordinate_swap.create_atom_map(gro, names, swap_pattern)
+    coordinate_swap.create_atom_map(gro, names, swap_pattern, True)
     atom_name_mapping_test = pd.read_csv('atom_name_mapping.csv')
     assert (atom_name_mapping_true == atom_name_mapping_test).all
     os.remove('atom_name_mapping.csv')
diff --git a/ensemble_md/utils/coordinate_swap.py b/ensemble_md/utils/coordinate_swap.py
@@ -106,6 +106,10 @@ def fix_break(mol, resname, box_dimensions, atom_connect_all, verbose, resid=Non
     atom_connect_all : pandas.DataFrame
         A pandas DataFrame which contains the name of all atoms which are connected to one another
         in the residue of interest
+    verbose : boolean
+        Whether print statements should be made or not
+    resid : None or int
+        The reisude ID of the molecule of interest if multiple residues of the same name
 
     Returns
     -------
@@ -989,14 +993,16 @@ def write_unmodified(line_start, orig_file, new_file, old_res_name, atom_num, pr
     return line_restart, atom_num_restart
 
 
-def _sep_num_element(atom_name):
+def _sep_num_element(atom_name, allow_virtual_V):
     """
     Seperate the atom name into the element and the atom number
 
     Parameters
     ----------
     atom_name : str
         Name of the atom to be seperated
+    allow_virtual_V : bool
+        Should the use of a V to indicate virtual atoms be allowed
 
     Returns
     -------
@@ -1022,7 +1028,7 @@ def _sep_num_element(atom_name):
             extra = ''.join(list(atom_identifier)[1:])
         else:
             extra = ''
-    if 'V' in extra:
+    if allow_virtual_V and 'V' in extra:
         extra = extra.strip('V')
     return element, num, extra
 
@@ -1246,7 +1252,7 @@ def _read_gro(side, resname_list, gro_list):
     return name, num
 
 
-def create_atom_map(gro_list, resname_list, swap_patterns):
+def create_atom_map(gro_list, resname_list, swap_patterns, allow_virtual_V=False):
     """
     If you generate your hybrid topologies in a way that the
     same atom has the same name in each molecule then this
@@ -1260,6 +1266,8 @@ def create_atom_map(gro_list, resname_list, swap_patterns):
         list of residue names with the transformation
     swap_patterns : list of list of intergers
         swapping pattern between simulations
+    allow_virtual_V : bool
+        Should the use of a V to indicate virtual atoms be allowed
 
     Returns
     -------
@@ -1272,7 +1280,7 @@ def create_atom_map(gro_list, resname_list, swap_patterns):
 
         atomnameA, atomidA, atomnameB, atomidB = [], [], [], []
         for n, name in enumerate(nameA):
-            element, num, extra = _sep_num_element(name)
+            element, num, extra = _sep_num_element(name, allow_virtual_V)
             if name in nameB:
                 atomnameA.append(name)
                 atomidA.append(numA[n])
@@ -1291,6 +1299,12 @@ def create_atom_map(gro_list, resname_list, swap_patterns):
                 nb = nameB.index(f'D{element}{num}')
                 atomnameB.append(f'D{element}{num}')
                 atomidB.append(numB[nb])
+            elif allow_virtual_V is True and f'{element}V{num}' in nameB:
+                atomnameA.append(name)
+                atomidA.append(numA[n])
+                nb = nameB.index(f'{element}V{num}')
+                atomnameB.append(f'{element}V{num}')
+                atomidB.append(numB[nb])
 
         df = pd.DataFrame({'resname A': resname_list[swap_pattern[0][0]], 'resname B': resname_list[swap_pattern[1][0]], 'atomid A': atomidA, 'atom name A': atomnameA, 'atomid B': atomidB, 'atom name B': atomnameB})  # noqa: E501
         output_df = pd.concat([output_df, df])