deepmodeling
diff --git a/‎docs/conf.py
Lines changed: 3 additions & 0 deletions b/‎docs/conf.py
Lines changed: 3 additions & 0 deletions
diff --git a/‎dpdata/abacus/__init__.py b/‎dpdata/abacus/__init__.py
diff --git a/‎dpdata/abacus/md.py
Lines changed: 21 additions & 7 deletions b/‎dpdata/abacus/md.py
Lines changed: 21 additions & 7 deletions
diff --git a/‎dpdata/abacus/scf.py
Lines changed: 97 additions & 3 deletions b/‎dpdata/abacus/scf.py
Lines changed: 97 additions & 3 deletions
diff --git a/‎dpdata/amber/sqm.py
Lines changed: 8 additions & 6 deletions b/‎dpdata/amber/sqm.py
Lines changed: 8 additions & 6 deletions
diff --git a/‎dpdata/ase_calculator.py
Lines changed: 76 additions & 0 deletions b/‎dpdata/ase_calculator.py
Lines changed: 76 additions & 0 deletions
diff --git a/‎dpdata/bond_order_system.py
Lines changed: 8 additions & 7 deletions b/‎dpdata/bond_order_system.py
Lines changed: 8 additions & 7 deletions
diff --git a/‎dpdata/cp2k/output.py
Lines changed: 2 additions & 2 deletions b/‎dpdata/cp2k/output.py
Lines changed: 2 additions & 2 deletions
@@ -183,4 +183,7 @@ def setup(app):
 intersphinx_mapping = {
     "numpy": ("https://docs.scipy.org/doc/numpy/", None),
     "python": ("https://docs.python.org/", None),
+    "ase": ("https://wiki.fysik.dtu.dk/ase/", None),
+    "monty": ("https://guide.materialsvirtuallab.org/monty/", None),
+    "h5py": ("https://docs.h5py.org/en/stable/", None),
 }
@@ -33,8 +33,17 @@ def get_coord_dump_freq(inlines):
 def get_coords_from_dump(dumplines, natoms):
     nlines = len(dumplines)
     total_natoms = sum(natoms)
-    nframes_dump = int(nlines/(total_natoms + 13))
-    
+    calc_stress = False
+    if "VIRIAL" in dumplines[6]:
+        calc_stress = True
+    else:
+        assert("POSITIONS" in dumplines[6] and "FORCE" in dumplines[6]), "keywords 'POSITIONS' and 'FORCE' cannot be found in the 6th line. Please check."
+    nframes_dump = -1
+    if calc_stress:
+        nframes_dump = int(nlines/(total_natoms + 13))
+    else:
+        nframes_dump = int(nlines/(total_natoms + 9))
+    assert(nframes_dump > 0), "Number of lines in MD_dump file = %d. Number of atoms = %d. The MD_dump file is incomplete."%(nlines, total_natoms)
     cells = np.zeros([nframes_dump, 3, 3])
     stresses = np.zeros([nframes_dump, 3, 3])
     forces = np.zeros([nframes_dump, total_natoms, 3])
@@ -47,12 +56,17 @@ def get_coords_from_dump(dumplines, natoms):
             # read in LATTICE_VECTORS
             for ix in range(3):
                 cells[iframe, ix] = np.array([float(i) for i in re.split('\s+', dumplines[iline+3+ix])[-3:]]) * celldm
-                stresses[iframe, ix] = np.array([float(i) for i in re.split('\s+', dumplines[iline+7+ix])[-3:]])
+                if calc_stress:
+                    stresses[iframe, ix] = np.array([float(i) for i in re.split('\s+', dumplines[iline+7+ix])[-3:]])
             for iat in range(total_natoms):
-                coords[iframe, iat] = np.array([float(i) for i in re.split('\s+', dumplines[iline+11+iat])[-6:-3]])*celldm
-                forces[iframe, iat] = np.array([float(i) for i in re.split('\s+', dumplines[iline+11+iat])[-3:]])
+                if calc_stress:
+                    coords[iframe, iat] = np.array([float(i) for i in re.split('\s+', dumplines[iline+11+iat])[-6:-3]])*celldm
+                    forces[iframe, iat] = np.array([float(i) for i in re.split('\s+', dumplines[iline+11+iat])[-3:]])
+                else:
+                    coords[iframe, iat] = np.array([float(i) for i in re.split('\s+', dumplines[iline+7+iat])[-6:-3]])*celldm
+                    forces[iframe, iat] = np.array([float(i) for i in re.split('\s+', dumplines[iline+7+iat])[-3:]])
             iframe += 1
-    assert(iframe == nframes_dump)
+    assert(iframe == nframes_dump), "iframe=%d, nframe_dump=%d. Number of frames does not match number of lines in MD_dump."%(iframe, nframes_dump)
     cells *= bohr2ang
     coords *= bohr2ang
     stresses *= kbar2evperang3
@@ -66,7 +80,7 @@ def get_energy(outlines, ndump, dump_freq):
             if nenergy%dump_freq == 0:
                 energy.append(float(line.split()[-2]))
             nenergy+=1
-    assert(ndump == len(energy))
+    assert(ndump == len(energy)), "Number of total energies in running_md.log = %d. Number of frames in MD_dump = %d. Please check."%(len(energy), ndump)
     energy = np.array(energy)
     return energy
 
 
@@ -1,7 +1,7 @@
 import os,sys
 import numpy as np
 from ..unit import EnergyConversion, PressureConversion, LengthConversion
-
+import re
 bohr2ang = LengthConversion("bohr", "angstrom").value()
 ry2ev = EnergyConversion("rydberg", "eV").value()
 kbar2evperang3 = PressureConversion("kbar", "eV/angstrom^3").value()
@@ -16,10 +16,10 @@ def get_block (lines, keyword, skip = 0, nlines = None):
             found = True
             blk_idx = idx + 1 + skip
             line_idx = 0
-            while len(lines[blk_idx].split("\s+")) == 0:
+            while len(re.split("\s+", lines[blk_idx])) == 0:
                 blk_idx += 1
             while line_idx < nlines and blk_idx != len(lines):
-                if len(lines[blk_idx].split("\s+")) == 0 or lines[blk_idx] == "":
+                if len(re.split("\s+", lines[blk_idx])) == 0 or lines[blk_idx] == "":
                     blk_idx+=1
                     continue
                 ret.append(lines[blk_idx])
@@ -184,6 +184,100 @@ def get_frame (fname):
     # print("virial = ", data['virials'])
     return data
 
+def get_nele_from_stru(geometry_inlines):
+    key_words_list = ["ATOMIC_SPECIES", "NUMERICAL_ORBITAL", "LATTICE_CONSTANT", "LATTICE_VECTORS", "ATOMIC_POSITIONS", "NUMERICAL_DESCRIPTOR"]
+    keyword_sequence = []
+    keyword_line_index = []
+    atom_names = []
+    atom_numbs = []
+    for iline, line in enumerate(geometry_inlines):
+        if line.split() == []:
+            continue
+        have_key_word = False
+        for keyword in key_words_list:
+            if keyword in line and keyword == line.split()[0]:
+                keyword_sequence.append(keyword)
+                keyword_line_index.append(iline)
+    assert(len(keyword_line_index) == len(keyword_sequence))
+    assert(len(keyword_sequence) > 0)
+    keyword_line_index.append(len(geometry_inlines))
+
+    nele = 0
+    for idx, keyword in enumerate(keyword_sequence):
+        if keyword == "ATOMIC_SPECIES":
+            for iline in range(keyword_line_index[idx]+1, keyword_line_index[idx+1]):
+                if len(re.split("\s+", geometry_inlines[iline])) >= 3:
+                    nele += 1
+    return nele
+
+def get_frame_from_stru(fname):
+    assert(type(fname) == str)
+    with open(fname, 'r') as fp:
+        geometry_inlines = fp.read().split('\n')
+    nele = get_nele_from_stru(geometry_inlines)
+    inlines = ["ntype %d" %nele]
+    celldm, cell = get_cell(geometry_inlines)
+    atom_names, natoms, types, coords = get_coords(celldm, cell, geometry_inlines, inlines) 
+    data = {}
+    data['atom_names'] = atom_names
+    data['atom_numbs'] = natoms
+    data['atom_types'] = types
+    data['cells'] = cell[np.newaxis, :, :]
+    data['coords'] = coords[np.newaxis, :, :]
+    data['orig'] = np.zeros(3)
+
+    return data
+
+def make_unlabeled_stru(data, frame_idx, pp_file=None, numerical_orbital=None, numerical_descriptor=None, mass=None):
+    out = "ATOMIC_SPECIES\n"
+    for iele in range(len(data['atom_names'])):
+        out += data['atom_names'][iele] + " "
+        if mass is not None:
+            out += "%d "%mass[iele]
+        else:
+            out += "1 "
+        if pp_file is not None:
+            out += "%s\n"%pp_file[iele]
+        else:
+            out += "\n"
+    out += "\n"
+
+    if numerical_orbital is not None:
+        assert(len(numerical_orbital) == len(data['atom_names']))
+        out += "NUMERICAL_ORBITAL\n"
+        for iele in range(len(numerical_orbital)):
+            out += "%s\n"%numerical_orbital[iele]
+        out += "\n"
+
+    if numerical_descriptor is not None:
+        assert(type(numerical_descriptor) == str)
+        out += "NUMERICAL_DESCRIPTOR\n%s\n"%numerical_descriptor
+        out += "\n"
+    
+    out += "LATTICE_CONSTANT\n"
+    out += str(1/bohr2ang) + "\n\n"
+
+    out += "LATTICE_VECTORS\n"
+    for ix in range(3):
+        for iy in range(3):
+            out += str(data['cells'][frame_idx][ix][iy]) + " "
+        out += "\n"
+    out += "\n"
+
+    out += "ATOMIC_POSITIONS\n"
+    out += "Cartesian    # Cartesian(Unit is LATTICE_CONSTANT)\n"
+    #ret += "\n"
+    natom_tot = 0
+    for iele in range(len(data['atom_names'])):
+        out += data['atom_names'][iele] + "\n"
+        out += "0.0\n"
+        out += str(data['atom_numbs'][iele]) + "\n"
+        for iatom in range(data['atom_numbs'][iele]):
+            out += "%.12f %.12f %.12f %d %d %d\n" % (data['coords'][frame_idx][natom_tot, 0], data['coords'][frame_idx][natom_tot, 1], data['coords'][frame_idx][natom_tot, 2], 1, 1, 1)
+            natom_tot += 1
+    assert(natom_tot == sum(data['atom_numbs']))
+    return out
+
 #if __name__ == "__main__":
 #    path = "/home/lrx/work/12_ABACUS_dpgen_interface/dpdata/dpdata/tests/abacus.scf"
 #    data = get_frame(path)
@@ -5,7 +5,6 @@
 kcal2ev = EnergyConversion("kcal_mol", "eV").value()
 
 START = 0
-READ_ENERGY = 1
 READ_CHARGE = 2
 READ_COORDS_START = 3
 READ_COORDS = 6
@@ -25,19 +24,19 @@ def parse_sqm_out(fname):
         flag = START
         for line in f:
             if line.startswith(" Total SCF energy"):
-                flag = READ_ENERGY
+                energy = float(line.strip().split()[-2])
+                energies = [energy]
             elif line.startswith("  Atom    Element       Mulliken Charge"):
                 flag = READ_CHARGE
+                charges = []
             elif line.startswith(" Total Mulliken Charge"):
                 flag = START
             elif line.startswith(" Final Structure"):
                 flag = READ_COORDS_START
+                coords = []
             elif line.startswith("QMMM: Forces on QM atoms"):
                 flag = READ_FORCES
-            elif flag == READ_ENERGY:
-                energy = float(line.strip().split()[-2])
-                energies.append(energy)
-                flag = START
+                forces = []
             elif flag == READ_CHARGE:
                 ls = line.strip().split()
                 atom_symbols.append(ls[-2])
@@ -50,6 +49,9 @@ def parse_sqm_out(fname):
                     flag = START
             elif flag == READ_FORCES:
                 ll = line.strip()
+                if not ll.startswith("QMMM: Atm "):
+                    flag = START
+                    continue
                 forces.append([float(ll[-60:-40]), float(ll[-40:-20]), float(ll[-20:])])
                 if len(forces) == len(charges):
                     flag = START
 
@@ -0,0 +1,76 @@
+from typing import List, Optional, TYPE_CHECKING
+
+from ase.calculators.calculator import (
+    Calculator, all_changes, PropertyNotImplementedError
+)
+
+import dpdata
+from .driver import Driver
+
+if TYPE_CHECKING:
+    from ase import Atoms
+
+
+class DPDataCalculator(Calculator):
+    """Implementation of ASE deepmd calculator based on a driver. 
+
+    Parameters
+    ----------
+    driver : Driver
+        dpdata driver
+    """
+
+    name = "dpdata"
+    implemented_properties = [
+        "energy", "free_energy", "forces", "virial", "stress"]
+
+    def __init__(
+        self,
+        driver: Driver,
+        **kwargs
+    ) -> None:
+        Calculator.__init__(self, label=Driver.__name__, **kwargs)
+        self.driver = driver
+
+    def calculate(
+        self,
+        atoms: Optional["Atoms"] = None,
+        properties: List[str] = ["energy", "forces"],
+        system_changes: List[str] = all_changes,
+    ):
+        """Run calculation with a driver.
+
+        Parameters
+        ----------
+        atoms : Optional[Atoms], optional
+            atoms object to run the calculation on, by default None
+        properties : List[str], optional
+            unused, only for function signature compatibility,
+            by default ["energy", "forces"]
+        system_changes : List[str], optional
+            unused, only for function signature compatibility, by default all_changes
+        """
+        if atoms is not None:
+            self.atoms = atoms.copy()
+
+        system = dpdata.System(self.atoms, fmt="ase/structure")
+        data = system.predict(driver=self.driver).data
+
+        self.results['energy'] = data['energies'][0]
+        # see https://gitlab.com/ase/ase/-/merge_requests/2485
+        self.results['free_energy'] = data['energies'][0]
+        self.results['forces'] = data['forces'][0]
+        if 'virials' in data:
+            self.results['virial'] = data['virials'][0].reshape(3, 3)
+
+        # convert virial into stress for lattice relaxation
+        if "stress" in properties:
+            if sum(atoms.get_pbc()) > 0:
+                # the usual convention (tensile stress is positive)
+                # stress = -virial / volume
+                stress = -0.5 * (data['virials'][0].copy() + data['virials'][0].copy().T) / \
+                    atoms.get_volume()
+                # Voigt notation
+                self.results['stress'] = stress.flat[[0, 4, 8, 5, 2, 1]]
+            else:
+                raise PropertyNotImplementedError
@@ -1,15 +1,13 @@
 #%%
 # Bond Order System
-from dpdata.system import System, LabeledSystem, check_System, load_format
+import numpy as np
+from dpdata.system import System, LabeledSystem, load_format, DataType, Axis
 import dpdata.rdkit.utils
 from dpdata.rdkit.sanitize import Sanitizer, SanitizeError
 from copy import deepcopy
 from rdkit.Chem import Conformer
 # import dpdata.rdkit.mol2
 
-def check_BondOrderSystem(data):
-    check_System(data)
-    assert ('bonds' in data.keys())
 
 class BondOrderSystem(System):
     '''
@@ -23,6 +21,11 @@ class BondOrderSystem(System):
                                         1 - single bond, 2 - double bond, 3 - triple bond, 1.5 - aromatic bond
         - `d_example['formal_charges']` : a numpy array of size 5 x 1
     '''
+    DTYPES = System.DTYPES + (
+        DataType("bonds", np.ndarray, (Axis.NBONDS, 3)),
+        DataType("formal_charges", np.ndarray, (Axis.NATOMS,)),
+    )
+
     def __init__(self,
                  file_name = None,
                  fmt = 'auto',
@@ -86,6 +89,7 @@ def __init__(self,
 
         if type_map:
             self.apply_type_map(type_map)
+        self.check_data()
 
     def from_fmt_obj(self, fmtobj, file_name, **kwargs):
         mol = fmtobj.from_bond_order_system(file_name, **kwargs)
@@ -104,9 +108,6 @@ def to_fmt_obj(self, fmtobj, *args, **kwargs):
             self.rdkit_mol.AddConformer(conf, assignId=True)
         return fmtobj.to_bond_order_system(self.data, self.rdkit_mol, *args, **kwargs)
 
-    def __repr__(self):
-        return self.__str__()
-
     def __str__(self):
         '''
             A brief summary of the system
 
@@ -3,7 +3,7 @@
 import re
 from collections import OrderedDict
 
-from scipy.constants.constants import R
+from scipy.constants import R
 from .cell import cell_to_low_triangle
 from ..unit import EnergyConversion, LengthConversion, ForceConversion, PressureConversion
 
@@ -268,7 +268,7 @@ def handle_single_xyz_frame(self, lines):
         #info_dict['atom_types'] = np.asarray(atom_types_list)
         info_dict['coords'] = np.asarray([coords_list]).astype('float32')
         info_dict['energies'] = np.array([energy]).astype('float32')
-        info_dict['orig']=[0,0,0]
+        info_dict['orig'] = np.zeros(3)
         return info_dict
 
 #%%
Original file line number	Diff line number	Diff line change
`@@ -183,4 +183,7 @@ def setup(app):`
`183`	`183`	`intersphinx_mapping = {`
`184`	`184`	`"numpy": ("https://docs.scipy.org/doc/numpy/", None),`
`185`	`185`	`"python": ("https://docs.python.org/", None),`
	`186`	`+ "ase": ("https://wiki.fysik.dtu.dk/ase/", None),`
	`187`	`+ "monty": ("https://guide.materialsvirtuallab.org/monty/", None),`
	`188`	`+ "h5py": ("https://docs.h5py.org/en/stable/", None),`
`186`	`189`	`}`