deepmodeling
diff --git a/‎dpdata/gaussian/gjf.py
Lines changed: 239 additions & 0 deletions b/‎dpdata/gaussian/gjf.py
Lines changed: 239 additions & 0 deletions
diff --git a/‎dpdata/plugins/dftbplus.py b/‎dpdata/plugins/dftbplus.py
diff --git a/‎dpdata/plugins/gaussian.py
Lines changed: 84 additions & 0 deletions b/‎dpdata/plugins/gaussian.py
Lines changed: 84 additions & 0 deletions
diff --git a/‎tests/context.py
Lines changed: 1 addition & 0 deletions b/‎tests/context.py
Lines changed: 1 addition & 0 deletions
@@ -0,0 +1,239 @@
+# The initial code of this file is based on
+# https://github.com/deepmodeling/dpgen/blob/0767dce7cad29367edb2e4a55fd0d8724dbda642/dpgen/generator/lib/gaussian.py#L1-L190
+# under LGPL 3.0 license
+"""Generate Gaussian input file."""
+
+from typing import List, Tuple, Union
+import uuid
+import itertools
+import warnings
+import numpy as np
+from scipy.sparse import csr_matrix
+from scipy.sparse.csgraph import connected_components
+try:
+    from openbabel import openbabel
+except ImportError:
+    try:
+        import openbabel
+    except ImportError:
+        openbabel = None
+from dpdata.periodic_table import Element
+
+
+
+def _crd2frag(symbols: List[str], crds: np.ndarray) -> Tuple[int, List[int]]:
+    """Detect fragments from coordinates.
+    
+    Parameters
+    ----------
+    symbols : list[str]
+        element symbols; virtual elements are not supported
+    crds : np.ndarray
+        atomic coordinates, shape: (N, 3)
+
+    Returns
+    -------
+    frag_numb : int
+        number of fragments
+    frag_index : list[int]
+        frament index that each atom belongs to
+
+    Notes
+    -----
+    In this method, Open Babel is used to detect bond connectivity. The threshold
+    is the sum of covalent radii with a slight tolerance (0.45 A). Note that
+    this threshold has errors.
+
+    PBC support is removed from this method as Gaussian does not support PBC calculation.
+
+    Raises
+    ------
+    ImportError
+        if Open Babel is not installed
+    """
+    if openbabel is None:
+        raise ImportError("Open Babel (Python interface) should be installed to detect fragmentation!")
+    atomnumber = len(symbols)
+    # Use openbabel to connect atoms
+    mol = openbabel.OBMol()
+    mol.BeginModify()
+    for idx, (symbol, position) in enumerate(zip(symbols, crds.astype(np.float64))):
+        num = Element(symbol).Z
+        atom = mol.NewAtom(idx)
+        atom.SetAtomicNum(int(num))
+        atom.SetVector(*position)
+    mol.ConnectTheDots()
+    mol.PerceiveBondOrders()
+    mol.EndModify()
+    bonds = []
+    for ii in range(mol.NumBonds()):
+        bond = mol.GetBond(ii)
+        a = bond.GetBeginAtom().GetId()
+        b = bond.GetEndAtom().GetId()
+        bo = bond.GetBondOrder()
+        bonds.extend([[a, b, bo], [b, a, bo]])
+    bonds = np.array(bonds, ndmin=2).reshape((-1, 3))
+    graph = csr_matrix(
+        (bonds[:, 2], (bonds[:, 0], bonds[:, 1])), shape=(atomnumber, atomnumber))
+    frag_numb, frag_index = connected_components(graph, 0)
+    return frag_numb, frag_index
+
+
+def detect_multiplicity(symbols: np.ndarray) -> int:
+    """Find the minimal multiplicity of the given molecules.
+    
+    Parameters
+    ----------
+    symbols : np.ndarray
+        element symbols; virtual elements are not supported
+
+    Returns
+    -------
+    int
+        spin multiplicity
+    """
+    # currently only support charge=0
+    # oxygen -> 3
+    if np.count_nonzero(symbols == ["O"]) == 2 and symbols.size == 2:
+        return 3
+    # calculates the total number of electrons, assumes they are paired as much as possible
+    n_total = sum([Element(s).Z for s in symbols])
+    return n_total % 2 + 1
+
+
+def make_gaussian_input(
+        sys_data: dict,
+        keywords: Union[str, List[str]],
+        multiplicity: Union[str ,int] = "auto",
+        charge: int = 0,
+        fragment_guesses: bool = False,
+        basis_set: str = None,
+        keywords_high_multiplicity: str = None,
+        nproc: int = 1,
+        ) -> str:
+    """Make gaussian input file.
+
+    Parameters
+    ----------
+    sys_data : dict
+        system data
+    keywords : str or list[str]
+        Gaussian keywords, e.g. force b3lyp/6-31g**. If a list,
+        run multiple steps
+    multiplicity : str or int, default=auto
+        spin multiplicity state. It can be a number. If auto,
+        multiplicity will be detected automatically, with the
+        following rules:
+            fragment_guesses=True
+                multiplicity will +1 for each radical, and +2
+                for each oxygen molecule
+            fragment_guesses=False
+                multiplicity will be 1 or 2, but +2 for each
+                oxygen molecule
+    charge : int, default=0
+        molecule charge. Only used when charge is not provided
+        by the system
+    fragment_guesses : bool, default=False
+        initial guess generated from fragment guesses. If True,
+        multiplicity should be auto
+    basis_set : str, default=None
+        custom basis set
+    keywords_high_multiplicity : str, default=None
+        keywords for points with multiple raicals. multiplicity
+        should be auto. If not set, fallback to normal keywords
+    nproc : int, default=1
+        Number of CPUs to use
+
+    Returns
+    -------
+    str
+        gjf output string
+    """
+    coordinates = sys_data['coords'][0]
+    atom_names = sys_data['atom_names']
+    atom_numbs = sys_data['atom_numbs']
+    atom_types = sys_data['atom_types']
+    # get atom symbols list
+    symbols = [atom_names[atom_type] for atom_type in atom_types]
+
+    # assume default charge is zero and default spin multiplicity is 1
+    if 'charge' in sys_data.keys():
+        charge = sys_data['charge']
+        
+    use_fragment_guesses = False
+    if isinstance(multiplicity, int):
+        mult_auto = False
+    elif multiplicity == 'auto':
+        mult_auto = True
+    else:
+        raise RuntimeError('The keyword "multiplicity" is illegal.')
+
+    if fragment_guesses:
+        # Initial guess generated from fragment guesses
+        # New feature of Gaussian 16
+        use_fragment_guesses = True
+        if not mult_auto:
+            warnings.warn("Automatically set multiplicity to auto!")
+            mult_auto = True
+
+    if mult_auto:
+        frag_numb, frag_index = _crd2frag(symbols, coordinates)
+        if frag_numb == 1:
+            use_fragment_guesses = False
+        mult_frags = []
+        for i in range(frag_numb):
+            idx = frag_index == i
+            mult_frags.append(detect_multiplicity(np.array(symbols)[idx]))
+        if use_fragment_guesses:
+            multiplicity = sum(mult_frags) - frag_numb + 1 - charge % 2
+            chargekeywords_frag = "%d %d" % (charge, multiplicity) + \
+                ''.join([' %d %d' % (charge, mult_frag)
+                         for mult_frag in mult_frags])
+        else:
+            multi_frags = np.array(mult_frags)
+            multiplicity = 1 + \
+                np.count_nonzero(multi_frags == 2) % 2 + \
+                np.count_nonzero(multi_frags == 3) * 2 - charge % 2
+
+        if keywords_high_multiplicity is not None and np.count_nonzero(multi_frags == 2) >= 2:
+            # at least 2 radicals
+            keywords = keywords_high_multiplicity
+
+    if isinstance(keywords, str):
+        keywords = [keywords]
+    else:
+        keywords = keywords.copy()
+
+    buff = []
+    # keywords, e.g., force b3lyp/6-31g**
+    if use_fragment_guesses:
+        keywords[0] = '{} guess=fragment={}'.format(
+            keywords[0], frag_numb)
+
+    chkkeywords = []
+    if len(keywords)>1:
+        chkkeywords.append('%chk={}.chk'.format(str(uuid.uuid1())))
+
+    nprockeywords = '%nproc={:d}'.format(nproc)
+    # use formula as title
+    titlekeywords = ''.join(["{}{}".format(symbol,numb) for symbol,numb in
+            zip(atom_names, atom_numbs)])
+    chargekeywords = '{} {}'.format(charge, multiplicity)
+
+    buff = [*chkkeywords, nprockeywords, '#{}'.format(
+        keywords[0]), '', titlekeywords, '', (chargekeywords_frag if use_fragment_guesses else chargekeywords)]
+
+    for ii, (symbol, coordinate) in enumerate(zip(symbols, coordinates)):
+        if use_fragment_guesses:
+            buff.append("%s(Fragment=%d) %f %f %f" %
+                        (symbol, frag_index[ii] + 1, *coordinate))
+        else:
+            buff.append("%s %f %f %f" % (symbol, *coordinate))
+    if basis_set is not None:
+        # custom basis set
+        buff.extend(['', basis_set, ''])
+    for kw in itertools.islice(keywords, 1, None):
+        buff.extend(['\n--link1--', *chkkeywords, nprockeywords,
+                    '#{}'.format(kw), '', titlekeywords, '', chargekeywords, ''])
+    buff.append('\n')
+    return '\n'.join(buff)
@@ -1,5 +1,11 @@
+import os
+import tempfile
+import subprocess as sp
+
 import dpdata.gaussian.log
+import dpdata.gaussian.gjf
 from dpdata.format import Format
+from dpdata.driver import Driver
 
 
 @Format.register("gaussian/log")
@@ -19,3 +25,81 @@ def from_labeled_system(self, file_name, md=False, **kwargs):
 class GaussianMDFormat(Format):
     def from_labeled_system(self, file_name, **kwargs):
         return GaussianLogFormat().from_labeled_system(file_name, md=True)
+
+
+@Format.register("gaussian/gjf")
+class GaussiaGJFFormat(Format):
+    """Gaussian input file"""
+    def to_system(self, data: dict, file_name: str, **kwargs):
+        """Generate Gaussian input file.
+
+        Parameters
+        ----------
+        data : dict
+            system data
+        file_name : str
+            file name
+        **kwargs : dict
+            Other parameters to make input files. See :meth:`dpdata.gaussian.gjf.make_gaussian_input`
+        """
+        text = dpdata.gaussian.gjf.make_gaussian_input(data, **kwargs)
+        with open(file_name, 'w') as fp:
+            fp.write(text)
+
+
+@Driver.register("gaussian")
+class GaussianDriver(Driver):
+    """Gaussian driver.
+
+    Note that "force" keyword must be added. If the number of atoms is large,
+    "Geom=PrintInputOrient" should be added.
+
+    Parameters
+    ----------
+    gaussian_exec : str, default=g16
+        path to gaussian program
+    **kwargs : dict
+        other arguments to make input files. See :class:`SQMINFormat`
+
+    Examples
+    --------
+    Use B3LYP method to calculate potential energy of a methane molecule:
+
+    >>> labeled_system = system.predict(keywords="force b3lyp/6-31g**", driver="gaussian")
+    >>> labeled_system['energies'][0]
+    -1102.714590995794
+    """
+    def __init__(self, gaussian_exec: str="g16", **kwargs: dict) -> None:
+        self.gaussian_exec = gaussian_exec
+        self.kwargs = kwargs
+
+    def label(self, data: dict) -> dict:
+        """Label a system data. Returns new data with energy, forces, and virials.
+        
+        Parameters
+        ----------
+        data : dict
+            data with coordinates and atom types
+        
+        Returns
+        -------
+        dict
+            labeled data with energies and forces
+        """
+        ori_system = dpdata.System(data=data)
+        labeled_system = dpdata.LabeledSystem()
+        with tempfile.TemporaryDirectory() as d:
+            for ii, ss in enumerate(ori_system):
+                inp_fn = os.path.join(d, "%d.gjf" % ii)
+                out_fn = os.path.join(d, "%d.log" % ii)
+                ss.to("gaussian/gjf", inp_fn, **self.kwargs)
+                try:
+                    sp.check_output([*self.gaussian_exec.split(), inp_fn])
+                except sp.CalledProcessError as e:
+                    with open(out_fn) as f:
+                        out = f.read()
+                    raise RuntimeError(
+                        "Run gaussian failed! Output:\n" + out
+                        ) from e
+                labeled_system.append(dpdata.LabeledSystem(out_fn, fmt="gaussian/log"))
+        return labeled_system.data
@@ -3,4 +3,5 @@
 import dpdata
 import dpdata.md.water
 import dpdata.md.msd
+import dpdata.gaussian.gjf
 import dpdata.system