Skip to content

Commit 0ef2e4b

Browse files
authored
Merge pull request #145 from Ericwang6/master
Add support for reading and dumping multi-frames for gro file & add dump system to gaussian input file
2 parents aeaf1fd + 471de61 commit 0ef2e4b

File tree

6 files changed

+220
-31
lines changed

6 files changed

+220
-31
lines changed

README.md

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -78,7 +78,7 @@ The `System` or `LabeledSystem` can be constructed from the following file forma
7878
| PWmat | movement | True | True | LabeledSystem | 'pwmat/movement' |
7979
| PWmat | OUT.MLMD | True | True | LabeledSystem | 'pwmat/out.mlmd' |
8080
| Amber | multi | True | True | LabeledSystem | 'amber/md' |
81-
| Gromacs | gro | False | False | System | 'gromacs/gro' |
81+
| Gromacs | gro | True | False | System | 'gromacs/gro' |
8282

8383

8484
The Class `dpdata.MultiSystems` can read data from a dir which may contains many files of different systems, or from single xyz file which contains different systems.

dpdata/gromacs/gro.py

Lines changed: 59 additions & 24 deletions
Original file line numberDiff line numberDiff line change
@@ -1,11 +1,23 @@
11
#!/usr/bin/env python3
2-
2+
import re
33
import numpy as np
4+
import json
5+
import os
46

57
nm2ang = 10.
8+
ang2nm = 1. / nm2ang
9+
cell_idx_gmx2dp = [0, 4, 8, 1, 2, 3, 5, 6, 7]
10+
11+
def _format_atom_name(atom_name):
12+
patt = re.compile("[a-zA-Z]*")
13+
match = re.search(patt, atom_name)
14+
fmt_name = match.group().capitalize()
15+
return fmt_name
616

7-
def _get_line(line):
17+
def _get_line(line, fmt_atom_name=True):
818
atom_name = line[10:15].split()[0]
19+
if fmt_atom_name:
20+
atom_name = _format_atom_name(atom_name)
921
atom_idx = int(line[15:20].split()[0])
1022
posis = [float(line[ii:ii+8]) for ii in range(20,44,8)]
1123
posis = np.array(posis) * nm2ang
@@ -29,27 +41,50 @@ def _get_cell(line):
2941
cell = cell * nm2ang
3042
return cell
3143

32-
def file_to_system_data(fname):
33-
names = []
34-
idxs = []
35-
posis = []
44+
def file_to_system_data(fname, format_atom_name=True):
45+
system = {'coords': [], 'cells': []}
3646
with open(fname) as fp:
37-
fp.readline()
38-
natoms = int(fp.readline())
39-
for ii in range(natoms):
40-
n, i, p = _get_line(fp.readline())
41-
names.append(n)
42-
idxs.append(i)
43-
posis.append(p)
44-
cell = _get_cell(fp.readline())
45-
posis = np.array(posis)
46-
system = {}
47-
system['orig'] = np.array([0, 0, 0])
48-
system['atom_names'] = list(set(names))
49-
system['atom_names'].sort()
50-
system['atom_numbs'] = [names.count(ii) for ii in system['atom_names']]
51-
system['atom_types'] = [system['atom_names'].index(ii) for ii in names]
52-
system['atom_types'] = np.array(system['atom_types'], dtype = int)
53-
system['coords'] = np.array([posis])
54-
system['cells'] = np.array([cell])
47+
frame = 0
48+
while True:
49+
flag = fp.readline()
50+
if not flag:
51+
break
52+
else:
53+
frame += 1
54+
names = []
55+
idxs = []
56+
posis = []
57+
natoms = int(fp.readline())
58+
for ii in range(natoms):
59+
n, i, p = _get_line(fp.readline(), fmt_atom_name=format_atom_name)
60+
names.append(n)
61+
idxs.append(i)
62+
posis.append(p)
63+
cell = _get_cell(fp.readline())
64+
posis = np.array(posis)
65+
if frame == 1:
66+
system['orig'] = np.zeros(3)
67+
system['atom_names'] = list(set(names))
68+
system['atom_numbs'] = [names.count(ii) for ii in system['atom_names']]
69+
system['atom_types'] = [system['atom_names'].index(ii) for ii in names]
70+
system['atom_types'] = np.array(system['atom_types'], dtype = int)
71+
system['coords'].append(posis)
72+
system['cells'].append(cell)
73+
system['coords'] = np.array(system['coords'])
74+
system['cells'] = np.array(system['cells'])
5575
return system
76+
77+
def from_system_data(system, f_idx=0):
78+
ret = ""
79+
ret += " molecule" + "\n"
80+
n_atoms = sum(system["atom_numbs"])
81+
ret += " " + str(n_atoms) + "\n"
82+
for i in range(n_atoms):
83+
atom_type = system["atom_types"][i]
84+
atom_name = system["atom_names"][atom_type]
85+
coords = system["coords"][f_idx] * ang2nm
86+
ret += "{:>5d}{:<5s}{:>5s}{:5d}{:8.3f}{:8.3f}{:8.3f}\n".format(1, "MOL", atom_name, i+1, *tuple(coords[i]))
87+
cell = (system["cells"][f_idx].flatten() * ang2nm)[cell_idx_gmx2dp]
88+
ret += " " + " ".join([str(x) for x in cell])
89+
90+
return ret

dpdata/system.py

Lines changed: 30 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -627,7 +627,7 @@ def from_deepmd_raw(self, folder, type_map = None) :
627627

628628
@register_from_funcs.register_funcs("gro")
629629
@register_from_funcs.register_funcs("gromacs/gro")
630-
def from_gromacs_gro(self, file_name) :
630+
def from_gromacs_gro(self, file_name, format_atom_name=True) :
631631
"""
632632
Load gromacs .gro file
633633
@@ -636,7 +636,35 @@ def from_gromacs_gro(self, file_name) :
636636
file_name : str
637637
The input file name
638638
"""
639-
self.data = dpdata.gromacs.gro.file_to_system_data(file_name)
639+
self.data = dpdata.gromacs.gro.file_to_system_data(file_name, format_atom_name=format_atom_name)
640+
641+
@register_to_funcs.register_funcs("gromacs/gro")
642+
def to_gromacs_gro(self, file_name=None, frame_idx=-1):
643+
"""
644+
Dump the system in gromacs .gro format
645+
646+
Parameters
647+
----------
648+
file_name : str or None
649+
The output file name. If None, return the file content as a string
650+
frame_idx : int
651+
The index of the frame to dump
652+
"""
653+
assert(frame_idx < len(self.data['coords']))
654+
if frame_idx == -1:
655+
strs = []
656+
for idx in range(self.get_nframes()):
657+
gro_str = dpdata.gromacs.gro.from_system_data(self.data, f_idx=idx)
658+
strs.append(gro_str)
659+
gro_str = "\n".join(strs)
660+
else:
661+
gro_str = dpdata.gromacs.gro.from_system_data(self.data, f_idx=frame_idx)
662+
663+
if file_name is None:
664+
return gro_str
665+
else:
666+
with open(file_name, 'w+') as fp:
667+
fp.write(gro_str)
640668

641669
@register_to_funcs.register_funcs("deepmd/npy")
642670
def to_deepmd_npy(self, folder, set_size = 5000, prec=np.float32) :
Lines changed: 35 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,35 @@
1+
ligand in water t= 0.00000 step= 0
2+
32
3+
1MOL C1 1 2.763 2.029 0.966
4+
1MOL C2 2 2.844 2.141 0.941
5+
1MOL C3 3 2.788 2.268 0.958
6+
1MOL C4 4 2.653 2.285 0.991
7+
1MOL C5 5 2.576 2.171 1.011
8+
1MOL C6 6 2.630 2.042 1.007
9+
1MOL CL1 7 2.405 2.194 1.045
10+
1MOL C7 8 2.588 2.419 0.999
11+
1MOL O1 9 2.558 2.474 0.894
12+
1MOL N1 10 2.572 2.471 1.125
13+
1MOL C8 11 2.504 2.587 1.170
14+
1MOL C9 12 2.542 2.648 1.289
15+
1MOL C10 13 2.461 2.753 1.336
16+
1MOL N2 14 2.359 2.804 1.266
17+
1MOL C11 15 2.321 2.742 1.153
18+
1MOL C12 16 2.389 2.631 1.103
19+
1MOL N3 17 2.217 2.806 1.082
20+
1MOL C13 18 2.118 2.742 1.011
21+
1MOL O2 19 2.121 2.624 0.980
22+
1MOL C14 20 2.003 2.827 0.960
23+
1MOL CL2 21 2.875 2.415 0.920
24+
1MOL H1 22 2.026 2.932 0.936
25+
1MOL H2 23 2.810 1.932 0.951
26+
1MOL H3 24 2.945 2.132 0.902
27+
1MOL H4 25 2.577 1.954 1.041
28+
1MOL H5 26 2.608 2.408 1.196
29+
1MOL H6 27 2.624 2.605 1.346
30+
1MOL H7 28 2.483 2.804 1.430
31+
1MOL H8 29 2.357 2.582 1.012
32+
1MOL H9 30 2.203 2.904 1.105
33+
1MOL H10 31 1.920 2.816 1.031
34+
1MOL H11 32 1.967 2.787 0.864
35+
3.24290 3.24290 2.29308 0.00000 0.00000 0.00000 0.00000 1.62145 1.62145

tests/gromacs/multi_frames.gro

Lines changed: 24 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,24 @@
1+
frame_1
2+
9
3+
1SOL O 1 0.135 0.183 0.341
4+
1SOL H 2 0.177 0.149 0.262
5+
1SOL H 3 0.046 0.149 0.339
6+
2SOL O 4 0.520 0.447 0.111
7+
2SOL H 5 0.567 0.481 0.035
8+
2SOL H 6 0.568 0.481 0.186
9+
3SOL O 7 0.651 0.539 0.335
10+
3SOL H 8 0.653 0.634 0.336
11+
3SOL H 9 0.743 0.512 0.336
12+
0.7822838765564372 0.7353572647182051 0.9036518515423753
13+
frame_2
14+
9
15+
1SOL O 1 0.135 0.183 0.341
16+
1SOL H 2 0.177 0.149 0.262
17+
1SOL H 3 0.046 0.149 0.339
18+
2SOL O 4 0.520 0.447 0.111
19+
2SOL H 5 0.567 0.481 0.035
20+
2SOL H 6 0.568 0.481 0.186
21+
3SOL O 7 0.651 0.539 0.335
22+
3SOL H 8 0.653 0.634 0.336
23+
3SOL H 9 0.743 0.512 0.336
24+
0.7822838765564372 0.7353572647182051 0.9036518515423753

tests/test_gromacs_gro.py

Lines changed: 71 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -5,8 +5,9 @@
55

66
class TestGromacsGro(unittest.TestCase):
77
def test_read_file(self):
8-
system = dpdata.System('gromacs/1h.gro')
9-
self.assertEqual(system['atom_names'], ['H', 'O'])
8+
system = dpdata.System('gromacs/1h.gro', type_map=['H', 'O'])
9+
self.assertTrue('H' in system['atom_names'])
10+
self.assertTrue('O' in system['atom_names'])
1011
self.assertEqual(system['atom_numbs'], [6, 3])
1112
for cc,ii in enumerate([1, 0, 0, 1, 0, 0, 1, 0, 0]):
1213
self.assertEqual(system['atom_types'][cc], ii)
@@ -24,8 +25,9 @@ def test_read_file(self):
2425
self.assertAlmostEqual(system['coords'][0][8][2], 3.36)
2526

2627
def test_read_file_tri(self):
27-
system = dpdata.System('gromacs/1h.tri.gro')
28-
self.assertEqual(system['atom_names'], ['H', 'O'])
28+
system = dpdata.System('gromacs/1h.tri.gro', type_map=['H', 'O'])
29+
self.assertTrue('H' in system['atom_names'])
30+
self.assertTrue('O' in system['atom_names'])
2931
self.assertEqual(system['atom_numbs'], [6, 3])
3032
for cc,ii in enumerate([1, 0, 0, 1, 0, 0, 1, 0, 0]):
3133
self.assertEqual(system['atom_types'][cc], ii)
@@ -44,3 +46,68 @@ def test_read_file_tri(self):
4446
self.assertAlmostEqual(system['coords'][0][8][1], 5.12)
4547
self.assertAlmostEqual(system['coords'][0][8][2], 3.36)
4648
system.to('vasp/poscar', 'POSCAR')
49+
50+
class TestGromacsGroMultiFrames(unittest.TestCase):
51+
def test_read_file(self):
52+
system = dpdata.System('gromacs/multi_frames.gro', type_map=['H', 'O'])
53+
self.assertTrue('H' in system['atom_names'])
54+
self.assertTrue('O' in system['atom_names'])
55+
self.assertEqual(system['atom_numbs'], [6, 3])
56+
for cc,ii in enumerate([1, 0, 0, 1, 0, 0, 1, 0, 0]):
57+
self.assertEqual(system['atom_types'][cc], ii)
58+
self.assertEqual(len(system['cells']), 2)
59+
self.assertEqual(len(system['coords']), 2)
60+
for ii in range(3):
61+
for jj in range(3):
62+
if ii != jj:
63+
self.assertAlmostEqual(system['cells'][0][ii][jj], 0) # frame no.1
64+
self.assertAlmostEqual(system['cells'][1][ii][jj], 0) # frame no.2
65+
# frame no.1
66+
self.assertAlmostEqual(system['cells'][0][0][0], 7.822838765564372)
67+
self.assertAlmostEqual(system['cells'][0][1][1], 7.353572647182051)
68+
self.assertAlmostEqual(system['cells'][0][2][2], 9.036518515423753)
69+
self.assertAlmostEqual(system['coords'][0][8][0], 7.43)
70+
self.assertAlmostEqual(system['coords'][0][8][1], 5.12)
71+
self.assertAlmostEqual(system['coords'][0][8][2], 3.36)
72+
# frame no.2
73+
self.assertAlmostEqual(system['cells'][1][0][0], 7.822838765564372)
74+
self.assertAlmostEqual(system['cells'][1][1][1], 7.353572647182051)
75+
self.assertAlmostEqual(system['cells'][1][2][2], 9.036518515423753)
76+
self.assertAlmostEqual(system['coords'][1][8][0], 7.43)
77+
self.assertAlmostEqual(system['coords'][1][8][1], 5.12)
78+
self.assertAlmostEqual(system['coords'][1][8][2], 3.36)
79+
80+
81+
class TestFormatAtomName(unittest.TestCase):
82+
def test_format_atom_name(self):
83+
system = dpdata.System("gromacs/case_for_format_atom_name.gro", fmt='gromacs/gro', type_map=['H','C','N','O','Cl'])
84+
self.assertEqual(system.formula, "H11C14N3O2Cl2")
85+
86+
def test_no_format_atom_name(self):
87+
system = dpdata.System("gromacs/case_for_format_atom_name.gro", fmt='gromacs/gro', format_atom_name=False)
88+
atoms = ['CL1', 'H6', 'C4', 'C3', 'C6', 'C11', 'H10', 'C2', 'N3', 'C14',
89+
'H7', 'H8', 'C13', 'H2', 'H1', 'H4', 'O2', 'H9', 'O1', 'N2', 'C9',
90+
'H3', 'C5', 'H11', 'N1', 'C7', 'C10', 'CL2', 'H5', 'C1', 'C8','C12']
91+
for at in atoms:
92+
self.assertTrue(at in system['atom_names'])
93+
94+
95+
class TestDumpGromacsGro(unittest.TestCase):
96+
def setUp(self):
97+
self.system = dpdata.System('gromacs/multi_frames.gro', type_map=['H', 'O'])
98+
99+
def test_dump_single_frame(self):
100+
self.system.to_gromacs_gro('gromacs/tmp_1.gro', frame_idx=0)
101+
tmp = dpdata.System('gromacs/tmp_1.gro', type_map=['H', 'O'])
102+
self.assertEqual(tmp.get_nframes(), 1)
103+
104+
def test_dump_multi_frames(self):
105+
self.system.to_gromacs_gro('gromacs/tmp_2.gro')
106+
tmp = dpdata.System('gromacs/tmp_2.gro', type_map=['H', 'O'])
107+
self.assertEqual(tmp.get_nframes(), 2)
108+
109+
def tearDown(self):
110+
if os.path.exists('gromacs/tmp_1.gro'):
111+
os.remove('gromacs/tmp_1.gro')
112+
if os.path.exists('gromacs/tmp_2.gro'):
113+
os.remove('gromacs/tmp_2.gro')

0 commit comments

Comments
 (0)