Skip to content

Commit 7232d7d

Browse files
Ericwang6njzjz
andauthored
Amber/sqm and Gromacs/gro Enhancement (#198)
* add BondOrderSystem * add BondOrderSystem * add BondOrderSystem * add BondOrderSystem * add BondOrderSystem * Update BondOrderSystem * solve oveall dependency on rdkit * Add dpdata/rdkit package * Add information on functions * Spelling correction * Add bond order assignment * Add test cases for bond order assignment * Add support for AmberTools sqm/out * Add bond order assignment * Add support for AmberTools sqm/out * Creat __init__.py * Add rdkit & openbabel environment * Update README.md * Update test.yml * Remove non user-friendly warnings * Update dpdata/rdkit/sanitize.py Co-authored-by: Jinzhe Zeng <[email protected]> * Use obabel python interface * Update README.md * Remove dependency on data for MolFormat & SdfFormat * Support dump multiple conformers for sdf file * Fix bug of SetFormalCharge type error RDKit cannot automatically transfer np.int32 to int * Add ut for to_sdf_file * Bug fix of dump to deepmd/raw file * More flexibility to control writing to gro file Support control resname / add atom index shift when dumping to .gro file * Support parse sqm.out to LabeledSystem & prepare sqm.in file Co-authored-by: Jinzhe Zeng <[email protected]>
1 parent f594787 commit 7232d7d

File tree

9 files changed

+548
-33
lines changed

9 files changed

+548
-33
lines changed

dpdata/amber/sqm.py

Lines changed: 67 additions & 19 deletions
Original file line numberDiff line numberDiff line change
@@ -1,11 +1,13 @@
11
import numpy as np
2+
from ..periodic_table import ELEMENTS
23

34

45
START = 0
5-
READ_CHARGE = 1
6-
READ_CHARGE_SUCCESS = 2
6+
READ_ENERGY = 1
7+
READ_CHARGE = 2
78
READ_COORDS_START = 3
89
READ_COORDS = 6
10+
READ_FORCES = 7
911

1012
def parse_sqm_out(fname):
1113
'''
@@ -14,41 +16,87 @@ def parse_sqm_out(fname):
1416
atom_symbols = []
1517
coords = []
1618
charges = []
19+
forces = []
20+
energies = []
21+
1722
with open(fname) as f:
18-
flag = 0
23+
flag = START
1924
for line in f:
20-
if line.startswith(" Atom Element Mulliken Charge"):
25+
if line.startswith(" Total SCF energy"):
26+
flag = READ_ENERGY
27+
elif line.startswith(" Atom Element Mulliken Charge"):
2128
flag = READ_CHARGE
2229
elif line.startswith(" Total Mulliken Charge"):
23-
flag = READ_CHARGE_SUCCESS
30+
flag = START
2431
elif line.startswith(" Final Structure"):
2532
flag = READ_COORDS_START
33+
elif line.startswith("QMMM: Forces on QM atoms"):
34+
flag = READ_FORCES
35+
elif flag == READ_ENERGY:
36+
energy = float(line.strip().split()[-2])
37+
energies.append(energy)
38+
flag = START
2639
elif flag == READ_CHARGE:
2740
ls = line.strip().split()
2841
atom_symbols.append(ls[-2])
2942
charges.append(float(ls[-1]))
3043
elif READ_COORDS_START <= flag < READ_COORDS:
3144
flag += 1
3245
elif flag == READ_COORDS:
33-
ls = line.strip()
34-
if not ls:
35-
break
36-
else:
37-
symbol = line.strip().split()[-4]
38-
coord = list(map(float, line.strip().split()[-3:]))
39-
coords.append(coord)
40-
return atom_symbols, charges, np.array(coords)
41-
42-
43-
def to_system_data(fname):
46+
coords.append([float(x) for x in line.strip().split()[-3:]])
47+
if len(coords) == len(charges):
48+
flag = START
49+
elif flag == READ_FORCES:
50+
forces.append([float(x) for x in line.strip().split()[-3:]])
51+
if len(forces) == len(charges):
52+
flag = START
53+
4454
data = {}
45-
atom_symbols, charges, coords = parse_sqm_out(fname)
4655
atom_names, data['atom_types'], atom_numbs = np.unique(atom_symbols, return_inverse=True, return_counts=True)
56+
data['charges'] = np.array(charges)
4757
data['atom_names'] = list(atom_names)
4858
data['atom_numbs'] = list(atom_numbs)
49-
data['charges'] = np.array([charges])
50-
data['coords'] = np.array([coords])
5159
data['orig'] = np.array([0, 0, 0])
5260
data['cells'] = np.array([[[100., 0., 0.], [0., 100., 0.], [0., 0., 100.]]])
5361
data['nopbc'] = True
62+
data['coords'] = np.array([coords])
63+
64+
energies = np.array(energies)
65+
forces = np.array([forces], dtype=np.float32)
66+
if len(forces) > 0:
67+
data['energies'] = energies
68+
data['forces'] = forces
69+
5470
return data
71+
72+
def make_sqm_in(data, fname=None, frame_idx=0, **kwargs):
73+
symbols = [data['atom_names'][ii] for ii in data['atom_types']]
74+
atomic_numbers = [ELEMENTS.index(ss) + 1 for ss in symbols]
75+
charge = kwargs.get("charge", 0)
76+
77+
# multiplicity
78+
mult = kwargs.get("mult", 1)
79+
if mult != 1 :
80+
raise RuntimeError("Multiplicity is not 1, which is not supported by sqm")
81+
82+
maxcyc = kwargs.get("maxcyc", 0) # 0 represents a single-point calculation
83+
theory = kwargs.get("qm_theory", "DFTB3")
84+
ret = "Run semi-emperical minimization\n"
85+
ret += " &qmmm\n"
86+
ret += f" qm_theory='{theory}'\n"
87+
ret += f" qmcharge={charge}\n"
88+
ret += f" maxcyc={maxcyc}\n"
89+
ret += " verbosity=4\n"
90+
ret += " /\n"
91+
for ii in range(len(data['atom_types'])):
92+
ret += "{:>4s}{:>6s}{:>14s}{:>14s}{:>14s}\n".format(
93+
str(atomic_numbers[ii]),
94+
str(symbols[ii]),
95+
f"{data['coords'][frame_idx][ii, 0]:.4f}",
96+
f"{data['coords'][frame_idx][ii, 1]:.4f}",
97+
f"{data['coords'][frame_idx][ii, 2]:.4f}"
98+
)
99+
if fname is not None:
100+
with open(fname, 'w') as fp:
101+
fp.write(ret)
102+
return ret

dpdata/gromacs/gro.py

Lines changed: 6 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -41,7 +41,7 @@ def _get_cell(line):
4141
cell = cell * nm2ang
4242
return cell
4343

44-
def file_to_system_data(fname, format_atom_name=True):
44+
def file_to_system_data(fname, format_atom_name=True, **kwargs):
4545
system = {'coords': [], 'cells': []}
4646
with open(fname) as fp:
4747
frame = 0
@@ -74,7 +74,9 @@ def file_to_system_data(fname, format_atom_name=True):
7474
system['cells'] = np.array(system['cells'])
7575
return system
7676

77-
def from_system_data(system, f_idx=0):
77+
def from_system_data(system, f_idx=0, **kwargs):
78+
resname = kwargs.get("resname", "MOL")
79+
shift = kwargs.get("shift", 0)
7880
ret = ""
7981
ret += " molecule" + "\n"
8082
n_atoms = sum(system["atom_numbs"])
@@ -83,8 +85,8 @@ def from_system_data(system, f_idx=0):
8385
atom_type = system["atom_types"][i]
8486
atom_name = system["atom_names"][atom_type]
8587
coords = system["coords"][f_idx] * ang2nm
86-
ret += "{:>5d}{:<5s}{:>5s}{:5d}{:8.3f}{:8.3f}{:8.3f}\n".format(1, "MOL", atom_name, i+1, *tuple(coords[i]))
88+
ret += "{:>5d}{:<5s}{:>5s}{:5d}{:8.3f}{:8.3f}{:8.3f}\n".format(1, resname, atom_name, i+shift+1, *tuple(coords[i]))
8789
cell = (system["cells"][f_idx].flatten() * ang2nm)[cell_idx_gmx2dp]
88-
ret += " " + " ".join([str(x) for x in cell])
90+
ret += " " + " ".join([f"{x:.3f}" for x in cell])
8991

9092
return ret

dpdata/plugins/amber.py

Lines changed: 17 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -34,4 +34,20 @@ def from_system(self, fname, **kwargs):
3434
'''
3535
Read from ambertools sqm.out
3636
'''
37-
return dpdata.amber.sqm.to_system_data(fname)
37+
return dpdata.amber.sqm.parse_sqm_out(fname)
38+
39+
def from_labeled_system(self, fname, **kwargs):
40+
'''
41+
Read from ambertools sqm.out
42+
'''
43+
data = dpdata.amber.sqm.parse_sqm_out(fname)
44+
assert "forces" in list(data.keys()), f"No forces in {fname}"
45+
return data
46+
47+
@Format.register("sqm/in")
48+
class SQMINFormat(Format):
49+
def to_system(self, data, fname=None, frame_idx=0, **kwargs):
50+
"""
51+
Generate input files for semi-emperical calculation in sqm software
52+
"""
53+
return dpdata.amber.sqm.make_sqm_in(data, fname, frame_idx, **kwargs)

dpdata/plugins/gromacs.py

Lines changed: 4 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -14,7 +14,7 @@ def from_system(self, file_name, format_atom_name=True, **kwargs):
1414
file_name : str
1515
The input file name
1616
"""
17-
return dpdata.gromacs.gro.file_to_system_data(file_name, format_atom_name=format_atom_name)
17+
return dpdata.gromacs.gro.file_to_system_data(file_name, format_atom_name=format_atom_name, **kwargs)
1818

1919
def to_system(self, data, file_name=None, frame_idx=-1, **kwargs):
2020
"""
@@ -31,12 +31,13 @@ def to_system(self, data, file_name=None, frame_idx=-1, **kwargs):
3131
if frame_idx == -1:
3232
strs = []
3333
for idx in range(data['coords'].shape[0]):
34-
gro_str = dpdata.gromacs.gro.from_system_data(data, f_idx=idx)
34+
gro_str = dpdata.gromacs.gro.from_system_data(data, f_idx=idx,
35+
**kwargs)
3536
strs.append(gro_str)
3637
gro_str = "\n".join(strs)
3738
else:
3839
gro_str = dpdata.gromacs.gro.from_system_data(
39-
data, f_idx=frame_idx)
40+
data, f_idx=frame_idx, **kwargs)
4041

4142
if file_name is None:
4243
return gro_str

tests/amber/methane.mol

Lines changed: 14 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,14 @@
1+
2+
RDKit 3D
3+
4+
5 4 0 0 0 0 0 0 0 0999 V2000
5+
-0.0221 0.0032 0.0165 C 0 0 0 0 0 0 0 0 0 0 0 0
6+
-0.6690 0.8894 -0.1009 H 0 0 0 0 0 0 0 0 0 0 0 0
7+
-0.3778 -0.8578 -0.5883 H 0 0 0 0 0 0 0 0 0 0 0 0
8+
0.0964 -0.3151 1.0638 H 0 0 0 0 0 0 0 0 0 0 0 0
9+
0.9725 0.2803 -0.3911 H 0 0 0 0 0 0 0 0 0 0 0 0
10+
1 2 1 0
11+
1 3 1 0
12+
1 4 1 0
13+
1 5 1 0
14+
M END

tests/amber/sqm.in

Lines changed: 12 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,12 @@
1+
Run semi-emperical minimization
2+
&qmmm
3+
qm_theory='DFTB3'
4+
qmcharge=0
5+
maxcyc=0
6+
verbosity=4
7+
/
8+
6 C -0.0221 0.0032 0.0165
9+
1 H -0.6690 0.8894 -0.1009
10+
1 H -0.3778 -0.8578 -0.5883
11+
1 H 0.0964 -0.3151 1.0638
12+
1 H 0.9725 0.2803 -0.3911

0 commit comments

Comments
 (0)