Skip to content

Commit 744bf3b

Browse files
authored
add gaussian input format and driver (#314)
* add gaussian input format and driver * add more tests * add tests; use formula as title
1 parent 4a7aad8 commit 744bf3b

File tree

5 files changed

+433
-0
lines changed

5 files changed

+433
-0
lines changed

dpdata/gaussian/gjf.py

Lines changed: 239 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,239 @@
1+
# The initial code of this file is based on
2+
# https://github.com/deepmodeling/dpgen/blob/0767dce7cad29367edb2e4a55fd0d8724dbda642/dpgen/generator/lib/gaussian.py#L1-L190
3+
# under LGPL 3.0 license
4+
"""Generate Gaussian input file."""
5+
6+
from typing import List, Tuple, Union
7+
import uuid
8+
import itertools
9+
import warnings
10+
import numpy as np
11+
from scipy.sparse import csr_matrix
12+
from scipy.sparse.csgraph import connected_components
13+
try:
14+
from openbabel import openbabel
15+
except ImportError:
16+
try:
17+
import openbabel
18+
except ImportError:
19+
openbabel = None
20+
from dpdata.periodic_table import Element
21+
22+
23+
24+
def _crd2frag(symbols: List[str], crds: np.ndarray) -> Tuple[int, List[int]]:
25+
"""Detect fragments from coordinates.
26+
27+
Parameters
28+
----------
29+
symbols : list[str]
30+
element symbols; virtual elements are not supported
31+
crds : np.ndarray
32+
atomic coordinates, shape: (N, 3)
33+
34+
Returns
35+
-------
36+
frag_numb : int
37+
number of fragments
38+
frag_index : list[int]
39+
frament index that each atom belongs to
40+
41+
Notes
42+
-----
43+
In this method, Open Babel is used to detect bond connectivity. The threshold
44+
is the sum of covalent radii with a slight tolerance (0.45 A). Note that
45+
this threshold has errors.
46+
47+
PBC support is removed from this method as Gaussian does not support PBC calculation.
48+
49+
Raises
50+
------
51+
ImportError
52+
if Open Babel is not installed
53+
"""
54+
if openbabel is None:
55+
raise ImportError("Open Babel (Python interface) should be installed to detect fragmentation!")
56+
atomnumber = len(symbols)
57+
# Use openbabel to connect atoms
58+
mol = openbabel.OBMol()
59+
mol.BeginModify()
60+
for idx, (symbol, position) in enumerate(zip(symbols, crds.astype(np.float64))):
61+
num = Element(symbol).Z
62+
atom = mol.NewAtom(idx)
63+
atom.SetAtomicNum(int(num))
64+
atom.SetVector(*position)
65+
mol.ConnectTheDots()
66+
mol.PerceiveBondOrders()
67+
mol.EndModify()
68+
bonds = []
69+
for ii in range(mol.NumBonds()):
70+
bond = mol.GetBond(ii)
71+
a = bond.GetBeginAtom().GetId()
72+
b = bond.GetEndAtom().GetId()
73+
bo = bond.GetBondOrder()
74+
bonds.extend([[a, b, bo], [b, a, bo]])
75+
bonds = np.array(bonds, ndmin=2).reshape((-1, 3))
76+
graph = csr_matrix(
77+
(bonds[:, 2], (bonds[:, 0], bonds[:, 1])), shape=(atomnumber, atomnumber))
78+
frag_numb, frag_index = connected_components(graph, 0)
79+
return frag_numb, frag_index
80+
81+
82+
def detect_multiplicity(symbols: np.ndarray) -> int:
83+
"""Find the minimal multiplicity of the given molecules.
84+
85+
Parameters
86+
----------
87+
symbols : np.ndarray
88+
element symbols; virtual elements are not supported
89+
90+
Returns
91+
-------
92+
int
93+
spin multiplicity
94+
"""
95+
# currently only support charge=0
96+
# oxygen -> 3
97+
if np.count_nonzero(symbols == ["O"]) == 2 and symbols.size == 2:
98+
return 3
99+
# calculates the total number of electrons, assumes they are paired as much as possible
100+
n_total = sum([Element(s).Z for s in symbols])
101+
return n_total % 2 + 1
102+
103+
104+
def make_gaussian_input(
105+
sys_data: dict,
106+
keywords: Union[str, List[str]],
107+
multiplicity: Union[str ,int] = "auto",
108+
charge: int = 0,
109+
fragment_guesses: bool = False,
110+
basis_set: str = None,
111+
keywords_high_multiplicity: str = None,
112+
nproc: int = 1,
113+
) -> str:
114+
"""Make gaussian input file.
115+
116+
Parameters
117+
----------
118+
sys_data : dict
119+
system data
120+
keywords : str or list[str]
121+
Gaussian keywords, e.g. force b3lyp/6-31g**. If a list,
122+
run multiple steps
123+
multiplicity : str or int, default=auto
124+
spin multiplicity state. It can be a number. If auto,
125+
multiplicity will be detected automatically, with the
126+
following rules:
127+
fragment_guesses=True
128+
multiplicity will +1 for each radical, and +2
129+
for each oxygen molecule
130+
fragment_guesses=False
131+
multiplicity will be 1 or 2, but +2 for each
132+
oxygen molecule
133+
charge : int, default=0
134+
molecule charge. Only used when charge is not provided
135+
by the system
136+
fragment_guesses : bool, default=False
137+
initial guess generated from fragment guesses. If True,
138+
multiplicity should be auto
139+
basis_set : str, default=None
140+
custom basis set
141+
keywords_high_multiplicity : str, default=None
142+
keywords for points with multiple raicals. multiplicity
143+
should be auto. If not set, fallback to normal keywords
144+
nproc : int, default=1
145+
Number of CPUs to use
146+
147+
Returns
148+
-------
149+
str
150+
gjf output string
151+
"""
152+
coordinates = sys_data['coords'][0]
153+
atom_names = sys_data['atom_names']
154+
atom_numbs = sys_data['atom_numbs']
155+
atom_types = sys_data['atom_types']
156+
# get atom symbols list
157+
symbols = [atom_names[atom_type] for atom_type in atom_types]
158+
159+
# assume default charge is zero and default spin multiplicity is 1
160+
if 'charge' in sys_data.keys():
161+
charge = sys_data['charge']
162+
163+
use_fragment_guesses = False
164+
if isinstance(multiplicity, int):
165+
mult_auto = False
166+
elif multiplicity == 'auto':
167+
mult_auto = True
168+
else:
169+
raise RuntimeError('The keyword "multiplicity" is illegal.')
170+
171+
if fragment_guesses:
172+
# Initial guess generated from fragment guesses
173+
# New feature of Gaussian 16
174+
use_fragment_guesses = True
175+
if not mult_auto:
176+
warnings.warn("Automatically set multiplicity to auto!")
177+
mult_auto = True
178+
179+
if mult_auto:
180+
frag_numb, frag_index = _crd2frag(symbols, coordinates)
181+
if frag_numb == 1:
182+
use_fragment_guesses = False
183+
mult_frags = []
184+
for i in range(frag_numb):
185+
idx = frag_index == i
186+
mult_frags.append(detect_multiplicity(np.array(symbols)[idx]))
187+
if use_fragment_guesses:
188+
multiplicity = sum(mult_frags) - frag_numb + 1 - charge % 2
189+
chargekeywords_frag = "%d %d" % (charge, multiplicity) + \
190+
''.join([' %d %d' % (charge, mult_frag)
191+
for mult_frag in mult_frags])
192+
else:
193+
multi_frags = np.array(mult_frags)
194+
multiplicity = 1 + \
195+
np.count_nonzero(multi_frags == 2) % 2 + \
196+
np.count_nonzero(multi_frags == 3) * 2 - charge % 2
197+
198+
if keywords_high_multiplicity is not None and np.count_nonzero(multi_frags == 2) >= 2:
199+
# at least 2 radicals
200+
keywords = keywords_high_multiplicity
201+
202+
if isinstance(keywords, str):
203+
keywords = [keywords]
204+
else:
205+
keywords = keywords.copy()
206+
207+
buff = []
208+
# keywords, e.g., force b3lyp/6-31g**
209+
if use_fragment_guesses:
210+
keywords[0] = '{} guess=fragment={}'.format(
211+
keywords[0], frag_numb)
212+
213+
chkkeywords = []
214+
if len(keywords)>1:
215+
chkkeywords.append('%chk={}.chk'.format(str(uuid.uuid1())))
216+
217+
nprockeywords = '%nproc={:d}'.format(nproc)
218+
# use formula as title
219+
titlekeywords = ''.join(["{}{}".format(symbol,numb) for symbol,numb in
220+
zip(atom_names, atom_numbs)])
221+
chargekeywords = '{} {}'.format(charge, multiplicity)
222+
223+
buff = [*chkkeywords, nprockeywords, '#{}'.format(
224+
keywords[0]), '', titlekeywords, '', (chargekeywords_frag if use_fragment_guesses else chargekeywords)]
225+
226+
for ii, (symbol, coordinate) in enumerate(zip(symbols, coordinates)):
227+
if use_fragment_guesses:
228+
buff.append("%s(Fragment=%d) %f %f %f" %
229+
(symbol, frag_index[ii] + 1, *coordinate))
230+
else:
231+
buff.append("%s %f %f %f" % (symbol, *coordinate))
232+
if basis_set is not None:
233+
# custom basis set
234+
buff.extend(['', basis_set, ''])
235+
for kw in itertools.islice(keywords, 1, None):
236+
buff.extend(['\n--link1--', *chkkeywords, nprockeywords,
237+
'#{}'.format(kw), '', titlekeywords, '', chargekeywords, ''])
238+
buff.append('\n')
239+
return '\n'.join(buff)

dpdata/plugins/dftbplus.py

Whitespace-only changes.

dpdata/plugins/gaussian.py

Lines changed: 84 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,11 @@
1+
import os
2+
import tempfile
3+
import subprocess as sp
4+
15
import dpdata.gaussian.log
6+
import dpdata.gaussian.gjf
27
from dpdata.format import Format
8+
from dpdata.driver import Driver
39

410

511
@Format.register("gaussian/log")
@@ -19,3 +25,81 @@ def from_labeled_system(self, file_name, md=False, **kwargs):
1925
class GaussianMDFormat(Format):
2026
def from_labeled_system(self, file_name, **kwargs):
2127
return GaussianLogFormat().from_labeled_system(file_name, md=True)
28+
29+
30+
@Format.register("gaussian/gjf")
31+
class GaussiaGJFFormat(Format):
32+
"""Gaussian input file"""
33+
def to_system(self, data: dict, file_name: str, **kwargs):
34+
"""Generate Gaussian input file.
35+
36+
Parameters
37+
----------
38+
data : dict
39+
system data
40+
file_name : str
41+
file name
42+
**kwargs : dict
43+
Other parameters to make input files. See :meth:`dpdata.gaussian.gjf.make_gaussian_input`
44+
"""
45+
text = dpdata.gaussian.gjf.make_gaussian_input(data, **kwargs)
46+
with open(file_name, 'w') as fp:
47+
fp.write(text)
48+
49+
50+
@Driver.register("gaussian")
51+
class GaussianDriver(Driver):
52+
"""Gaussian driver.
53+
54+
Note that "force" keyword must be added. If the number of atoms is large,
55+
"Geom=PrintInputOrient" should be added.
56+
57+
Parameters
58+
----------
59+
gaussian_exec : str, default=g16
60+
path to gaussian program
61+
**kwargs : dict
62+
other arguments to make input files. See :class:`SQMINFormat`
63+
64+
Examples
65+
--------
66+
Use B3LYP method to calculate potential energy of a methane molecule:
67+
68+
>>> labeled_system = system.predict(keywords="force b3lyp/6-31g**", driver="gaussian")
69+
>>> labeled_system['energies'][0]
70+
-1102.714590995794
71+
"""
72+
def __init__(self, gaussian_exec: str="g16", **kwargs: dict) -> None:
73+
self.gaussian_exec = gaussian_exec
74+
self.kwargs = kwargs
75+
76+
def label(self, data: dict) -> dict:
77+
"""Label a system data. Returns new data with energy, forces, and virials.
78+
79+
Parameters
80+
----------
81+
data : dict
82+
data with coordinates and atom types
83+
84+
Returns
85+
-------
86+
dict
87+
labeled data with energies and forces
88+
"""
89+
ori_system = dpdata.System(data=data)
90+
labeled_system = dpdata.LabeledSystem()
91+
with tempfile.TemporaryDirectory() as d:
92+
for ii, ss in enumerate(ori_system):
93+
inp_fn = os.path.join(d, "%d.gjf" % ii)
94+
out_fn = os.path.join(d, "%d.log" % ii)
95+
ss.to("gaussian/gjf", inp_fn, **self.kwargs)
96+
try:
97+
sp.check_output([*self.gaussian_exec.split(), inp_fn])
98+
except sp.CalledProcessError as e:
99+
with open(out_fn) as f:
100+
out = f.read()
101+
raise RuntimeError(
102+
"Run gaussian failed! Output:\n" + out
103+
) from e
104+
labeled_system.append(dpdata.LabeledSystem(out_fn, fmt="gaussian/log"))
105+
return labeled_system.data

tests/context.py

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -3,4 +3,5 @@
33
import dpdata
44
import dpdata.md.water
55
import dpdata.md.msd
6+
import dpdata.gaussian.gjf
67
import dpdata.system

0 commit comments

Comments
 (0)