Skip to content

Commit 623dfad

Browse files
committed
add support for alternate swapping method
1 parent 17f53bf commit 623dfad

File tree

3 files changed

+477
-54
lines changed

3 files changed

+477
-54
lines changed

ensemble_md/replica_exchange_EE.py

Lines changed: 195 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -160,6 +160,8 @@ def set_params(self, analysis):
160160
optional_args = {
161161
"add_swappables": None,
162162
"modify_coords": None,
163+
"resname_select": None,
164+
"resname_transform": None,
163165
"resname_list": None,
164166
"swap_rep_pattern": None,
165167
"nst_sim": None,
@@ -456,6 +458,16 @@ def set_params(self, analysis):
456458
if self.resname_list is None and (not os.path.exists('residue_connect.csv') or not os.path.exists('residue_swap_map.csv')): # noqa: E501
457459
raise ParameterError('resname_list option must be filled in if using default swapping function and not swap guide') # noqa: E501
458460
self.modify_coords_fn = self.default_coords_fn
461+
elif self.modify_coords == 'number':
462+
if self.resname_select is None or self.resname_transform is None:
463+
raise ParameterError('resname_select and resname_transform options must be filled in if using the change number function')
464+
# Read file
465+
input_file = gmx_parser.read_top(self.top[-1], self.resname_transform)
466+
# Determine the atom names corresponding to the atom numbers
467+
start_line, atom_name, atom_num, state = coordinate_swap.get_names(input_file, self.resname_transform)
468+
# Save atom names for residue of interest and set the coordinate swap function
469+
self.atom_names = atom_name
470+
self.modify_coords_fn = self.change_num
459471
else:
460472
module_file = os.path.basename(self.modify_coords)
461473
module_dir = os.path.dirname(self.modify_coords)
@@ -1651,8 +1663,8 @@ def default_coords_fn(self, molA_file_name, molB_file_name, swap_index):
16511663
if len(molA.topology.select('water')) != 0:
16521664
A_dimensions = coordinate_swap.get_dimensions(molA_file)
16531665
B_dimensions = coordinate_swap.get_dimensions(molB_file)
1654-
molA = coordinate_swap.fix_break(molA, nameA, A_dimensions, connection_map[connection_map['Resname'] == nameA]) # noqa: E501
1655-
molB = coordinate_swap.fix_break(molB, nameB, B_dimensions, connection_map[connection_map['Resname'] == nameB]) # noqa: E501
1666+
molA = coordinate_swap.fix_break(molA, nameA, A_dimensions, connection_map[connection_map['Resname'] == nameA], self.verbose) # noqa: E501
1667+
molB = coordinate_swap.fix_break(molB, nameB, B_dimensions, connection_map[connection_map['Resname'] == nameB], self.verbose) # noqa: E501
16561668

16571669
# Step 4: Determine coordinates of atoms which need to be reconstructed as we swap coordinates between molecules # noqa: E501
16581670
df_atom_swap = coordinate_swap.get_miss_coord(molB, molA, nameB, nameA, df_atom_swap, 'A2B', swap_map[(swap_map['Swap A'] == nameB) & (swap_map['Swap B'] == nameA)]) # noqa: E501
@@ -1670,11 +1682,11 @@ def default_coords_fn(self, molA_file_name, molB_file_name, swap_index):
16701682
line_restart, atom_num_restart = coordinate_swap.write_unmodified(line_start, molA_file, molB_new, nameA, 1, line_start, copy.deepcopy(molA.xyz[0])) # noqa: E501
16711683

16721684
# Print new coordinates to file for molB
1673-
line_restart, atom_num_restart = coordinate_swap.write_modified(df_atom_swap, 'A2B', line_start, molA_file, molB_new, atom_num_restart, nameA, nameB, copy.deepcopy(molA.xyz[0]), atom_mapping, atom_order_B, atom_order_A) # noqa: E501
1685+
line_restart, atom_num_restart = coordinate_swap.write_modified(df_atom_swap, 'A2B', line_restart, molA_file, molB_new, atom_num_restart, nameA, nameB, copy.deepcopy(molA.xyz[0]), atom_mapping, atom_order_B, atom_order_A) # noqa: E501
16741686

16751687
if line_restart is not None:
16761688
# Print rest of file after modified residue
1677-
coordinate_swap.write_unmodified(line_restart, molA_file, molB_new, nameA, atom_num_restart, line_start, copy.deepcopy(molA.xyz[0])) # noqa: E501
1689+
line_start, atom_num_restart = coordinate_swap.write_unmodified(line_restart, molA_file, molB_new, nameA, atom_num_restart, line_start, copy.deepcopy(molA.xyz[0])) # noqa: E501
16781690

16791691
# Print box size
16801692
molB_new.write(molA_file[-1])
@@ -1688,11 +1700,11 @@ def default_coords_fn(self, molA_file_name, molB_file_name, swap_index):
16881700
line_restart, atom_num_restart = coordinate_swap.write_unmodified(line_start, molB_file, molA_new, nameB, 1, line_start, copy.deepcopy(molB.xyz[0])) # noqa: E501
16891701

16901702
# Print new coordinates to file for molA
1691-
line_restart, atom_num_restart = coordinate_swap.write_modified(df_atom_swap, 'B2A', line_start, molB_file, molA_new, atom_num_restart, nameB, nameA, copy.deepcopy(molB.xyz[0]), atom_mapping, atom_order_A, atom_order_B) # noqa: E501
1703+
line_restart, atom_num_restart = coordinate_swap.write_modified(df_atom_swap, 'B2A', line_restart, molB_file, molA_new, atom_num_restart, nameB, nameA, copy.deepcopy(molB.xyz[0]), atom_mapping, atom_order_A, atom_order_B) # noqa: E501
16921704

16931705
if line_restart is not None:
16941706
# Print rest of file after modified residue
1695-
coordinate_swap.write_unmodified(line_restart, molB_file, molA_new, nameB, atom_num_restart, line_start, copy.deepcopy(molB.xyz[0])) # noqa: E501
1707+
line_start, atom_num_restart = coordinate_swap.write_unmodified(line_restart, molB_file, molA_new, nameB, atom_num_restart, line_start, copy.deepcopy(molB.xyz[0])) # noqa: E501
16961708

16971709
# Print box size
16981710
molA_new.write(molB_file[-1])
@@ -1707,13 +1719,13 @@ def process_top(self):
17071719
Processes the input topologies in order to determine the atoms for alignment in the default GRO swapping
17081720
function. Output as csv files to prevent needing to re-run this step.
17091721
"""
1710-
if not os.path.exists('atom_name_mapping.csv'):
1722+
if not os.path.exists('atom_name_mapping.csv') and self.resname_list is not None:
17111723
coordinate_swap.create_atom_map(self.gro, self.resname_list, self.swap_rep_pattern)
17121724
atom_name_mapping = pd.read_csv('atom_name_mapping.csv')
1713-
else:
1725+
elif self.resname_list is not None:
17141726
atom_name_mapping = pd.read_csv('atom_name_mapping.csv')
17151727

1716-
if not os.path.exists('residue_connect.csv'):
1728+
if not os.path.exists('residue_connect.csv') and self.resname_list is not None:
17171729
df_top = pd.DataFrame()
17181730
for f, file_name in enumerate(self.top):
17191731
# Read file
@@ -1743,10 +1755,38 @@ def process_top(self):
17431755
df = pd.DataFrame({'Resname': self.resname_list[f], 'Connect 1': connect_1, 'Connect 2': connect_2, 'State 1': state_1, 'State 2': state_2}) # noqa: E501
17441756
df_top = pd.concat([df_top, df])
17451757
df_top.to_csv('residue_connect.csv')
1758+
elif not os.path.exists('residue_connect.csv'):
1759+
df_top = pd.DataFrame()
1760+
for resname in [self.resname_select, self.resname_transform]:
1761+
# Read file
1762+
input_file = gmx_parser.read_top(self.top[0], resname)
1763+
1764+
# Determine the atom names corresponding to the atom numbers
1765+
start_line, atom_name, atom_num, state = coordinate_swap.get_names(input_file, resname)
1766+
1767+
# Determine the connectivity of all atoms
1768+
connect_1, connect_2 = [], [] # Atom 1 and atom 2 which are connected and which state they are dummy atoms # noqa: E501
1769+
for l, line in enumerate(input_file[start_line:]): # noqa: E741
1770+
line_sep = line.split(' ')
1771+
if line_sep[0] == ';':
1772+
continue
1773+
if line_sep[0] == '\n':
1774+
break
1775+
while '' in line_sep:
1776+
line_sep.remove('')
1777+
if int(line_sep[0]) in atom_num and int(line_sep[1]) in atom_num:
1778+
num_1 = np.where(atom_num == int(line_sep[0]))[0][0]
1779+
num_2 = np.where(atom_num == int(line_sep[1]))[0][0]
1780+
connect_1.append(atom_name[num_1])
1781+
connect_2.append(atom_name[num_2])
1782+
1783+
df = pd.DataFrame({'Resname': resname, 'Connect 1': connect_1, 'Connect 2': connect_2}) # noqa: E501
1784+
df_top = pd.concat([df_top, df])
1785+
df_top.to_csv('residue_connect.csv')
17461786
else:
17471787
df_top = pd.read_csv('residue_connect.csv')
17481788

1749-
if not os.path.exists('residue_swap_map.csv'):
1789+
if not os.path.exists('residue_swap_map.csv') and self.resname_list is not None:
17501790
df_map = pd.DataFrame()
17511791
for swap in self.swap_rep_pattern:
17521792
# Determine atoms not present in both molecules
@@ -1773,3 +1813,148 @@ def process_top(self):
17731813
df_map = pd.concat([df_map, df])
17741814

17751815
df_map.to_csv('residue_swap_map.csv')
1816+
1817+
def change_num(self, molA_file_name, molB_file_name, swap_index):
1818+
"""
1819+
Swaps coordinates between two GRO files which differ in the number of the molecule of interest
1820+
1821+
Parameters
1822+
----------
1823+
molA_file_name : str
1824+
GRO file name for the moleucle to be swapped.
1825+
molB_file_name : str
1826+
GRO file name for the other moleucle to be swapped.
1827+
"""
1828+
# Determine name for transformed residue
1829+
molA_dir = molA_file_name.rsplit('/', 1)[0] + '/'
1830+
molB_dir = molB_file_name.rsplit('/', 1)[0] + '/'
1831+
1832+
# Load trajectory trr for higher precison coordinates
1833+
molA = md.load_trr(f'{molA_dir}/traj.trr', top=molA_file_name).slice(swap_index[0]) # Load specified frame of trr trajectory # noqa: E501
1834+
molB = md.load_trr(f'{molB_dir}/traj.trr', top=molB_file_name).slice(swap_index[1])
1835+
molA_coords = copy.deepcopy(molA.xyz[0])
1836+
molB_coords = copy.deepcopy(molB.xyz[0])
1837+
connection_map = pd.read_csv('residue_connect.csv')
1838+
1839+
# Step 1: Read the GRO input coordinate files and open temporary Output files
1840+
molA_file = open(molA_file_name, 'r').readlines() # open input file
1841+
molB_new_file_name = 'B_hybrid_swap.gro'
1842+
molB_new = open(molB_new_file_name, 'w')
1843+
molB_file = open(molB_file_name, 'r').readlines() # open input file
1844+
molA_new_file_name = 'A_hybrid_swap.gro'
1845+
molA_new = open(molA_new_file_name, 'w')
1846+
1847+
# Step 2: Determine how many of the molecule of interest are present in each system
1848+
numA, listA, trans_listA = coordinate_swap.deter_num_molecule(molA_file_name, self.resname_select, self.resname_transform)
1849+
numB, listB, trans_listB = coordinate_swap.deter_num_molecule(molB_file_name, self.resname_select, self.resname_transform)
1850+
1851+
# Step 3: Fix break if present for solvated systems only
1852+
if len(molA.topology.select('water')) != 0:
1853+
A_dimensions = coordinate_swap.get_dimensions(molA_file)
1854+
B_dimensions = coordinate_swap.get_dimensions(molB_file)
1855+
for resid in listA:
1856+
molA = coordinate_swap.fix_break(molA, self.resname_select, A_dimensions, connection_map[connection_map['Resname'] == self.resname_select], self.verbose, resid) # noqa: E501
1857+
for resid in trans_listA:
1858+
molA = coordinate_swap.fix_break(molA, self.resname_transform, A_dimensions, connection_map[connection_map['Resname'] == self.resname_transform], self.verbose, resid) # noqa: E501
1859+
for resid in listB:
1860+
molB = coordinate_swap.fix_break(molB, self.resname_select, B_dimensions, connection_map[connection_map['Resname'] == self.resname_select], self.verbose, resid) # noqa: E501
1861+
for resid in trans_listB:
1862+
molB = coordinate_swap.fix_break(molB, self.resname_transform, B_dimensions, connection_map[connection_map['Resname'] == self.resname_transform], self.verbose, resid) # noqa: E501
1863+
1864+
# Step 3: Setup the coordinate DF for the molecules
1865+
name, resid, resname, swap = [],[],[],[]
1866+
for i in listA:
1867+
for n in self.atom_names:
1868+
name.append(n)
1869+
resid.append(i)
1870+
resname.append(self.resname_select)
1871+
swap.append('B2A')
1872+
for i in trans_listA:
1873+
for n in self.atom_names:
1874+
name.append(n)
1875+
resid.append(i)
1876+
resname.append(self.resname_transform)
1877+
swap.append('B2A')
1878+
for i in listB:
1879+
for n in self.atom_names:
1880+
name.append(n)
1881+
resid.append(i)
1882+
resname.append(self.resname_select)
1883+
swap.append('A2B')
1884+
for i in trans_listB:
1885+
for n in self.atom_names:
1886+
name.append(n)
1887+
resid.append(i)
1888+
resname.append(self.resname_transform)
1889+
swap.append('A2B')
1890+
df_atom_swap = pd.DataFrame({'Name': name, 'Resid': resid, 'Resname': resname,'Swap': swap})
1891+
1892+
# Step 4: Determine coordinates of all residues of interest # noqa: E501
1893+
df_atom_swap = coordinate_swap.get_miss_coord_by_num(molB, molA, self.resname_select, self.resname_transform, 'A2B', np.array(listB+trans_listB), listA, trans_listA, self.atom_names, df_atom_swap) # noqa: E501
1894+
df_atom_swap = coordinate_swap.get_miss_coord_by_num(molA, molB, self.resname_select, self.resname_transform, 'B2A', np.array(listA+trans_listA), listB, trans_listB, self.atom_names, df_atom_swap) # noqa: E501
1895+
df_atom_swap.to_csv('df_atom_swap.csv')
1896+
1897+
# Step 5: Parse Current file to ensure atoms are added in the correct order
1898+
resname_opt = [self.resname_select, self.resname_transform]
1899+
if len(listA) == 0:
1900+
id_A = [[], trans_listA[0]]
1901+
else:
1902+
id_A = [listA[0], trans_listA[0]]
1903+
if len(listB) == 0:
1904+
id_B = [[], trans_listB[0]]
1905+
else:
1906+
id_B = [listB[0], trans_listB[0]]
1907+
atom_order_A = gmx_parser.deter_atom_order(molA_file, resname_opt, id_A)
1908+
atom_order_B = gmx_parser.deter_atom_order(molB_file, resname_opt, id_B)
1909+
1910+
# Step 6: Write the new file
1911+
# Reprint preamble text
1912+
preamble_legth = coordinate_swap.print_preamble(molA_file, molB_new, numB*len(self.atom_names), numA*len(self.atom_names)) # noqa: E501
1913+
line_start = preamble_legth
1914+
1915+
atom_num_restart = 1
1916+
resnum_restart = None
1917+
while line_start < len(molA_file):
1918+
if line_start is not None:
1919+
# Print up until we reach the residue to modify
1920+
line_start, atom_num_restart = coordinate_swap.write_unmodified(line_start, molA_file, molB_new,resname_opt, atom_num_restart, preamble_legth, molA_coords, resnum_restart) # noqa: E501
1921+
else:
1922+
break
1923+
1924+
if line_start is not np.nan:
1925+
# Print new coordinates to file for molB
1926+
line_start, resnum_restart, atom_num_restart = coordinate_swap.write_modified_res(df_atom_swap, 'A2B', line_start, molA_file, molB_new, atom_num_restart, atom_order_B, resname_opt) # noqa: E501
1927+
else:
1928+
break
1929+
1930+
# Print box size
1931+
molB_new.write(molA_file[-1])
1932+
molB_new.close()
1933+
1934+
# Step 7: Write the other new file
1935+
# Reprint preamble text
1936+
preamble_legth = coordinate_swap.print_preamble(molB_file, molA_new, numA*len(self.atom_names), numB*len(self.atom_names)) # noqa: E501
1937+
line_start = preamble_legth
1938+
1939+
atom_num_restart = 1
1940+
resnum_restart = None
1941+
while line_start < len(molB_file):
1942+
if line_start is not None:
1943+
# Print up until we reach the residue to modify
1944+
line_start, atom_num_restart = coordinate_swap.write_unmodified(line_start, molB_file, molA_new, resname_opt, atom_num_restart, preamble_legth, molB_coords, resnum_restart) # noqa: E501
1945+
else:
1946+
break
1947+
1948+
if line_start is not np.nan:
1949+
# Print new coordinates to file for molB
1950+
line_start, resnum_restart, atom_num_restart = coordinate_swap.write_modified_res(df_atom_swap, 'B2A', line_start, molB_file, molA_new, atom_num_restart, atom_order_A, resname_opt) # noqa: E501
1951+
else:
1952+
break
1953+
1954+
# Print box size
1955+
molA_new.write(molB_file[-1])
1956+
molA_new.close()
1957+
1958+
# Rename temp files
1959+
os.rename('A_hybrid_swap.gro', molB_dir + '/confout.gro')
1960+
os.rename('B_hybrid_swap.gro', molA_dir + '/confout.gro')

0 commit comments

Comments
 (0)