Skip to content

Commit 856fa77

Browse files
author
Han Wang
committed
Merge branch 'devel'
2 parents 7029230 + 0c67440 commit 856fa77

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

44 files changed

+6221
-427
lines changed

docs/conf.py

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -183,4 +183,7 @@ def setup(app):
183183
intersphinx_mapping = {
184184
"numpy": ("https://docs.scipy.org/doc/numpy/", None),
185185
"python": ("https://docs.python.org/", None),
186+
"ase": ("https://wiki.fysik.dtu.dk/ase/", None),
187+
"monty": ("https://guide.materialsvirtuallab.org/monty/", None),
188+
"h5py": ("https://docs.h5py.org/en/stable/", None),
186189
}

dpdata/abacus/__init__.py

Whitespace-only changes.

dpdata/abacus/md.py

Lines changed: 21 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -33,8 +33,17 @@ def get_coord_dump_freq(inlines):
3333
def get_coords_from_dump(dumplines, natoms):
3434
nlines = len(dumplines)
3535
total_natoms = sum(natoms)
36-
nframes_dump = int(nlines/(total_natoms + 13))
37-
36+
calc_stress = False
37+
if "VIRIAL" in dumplines[6]:
38+
calc_stress = True
39+
else:
40+
assert("POSITIONS" in dumplines[6] and "FORCE" in dumplines[6]), "keywords 'POSITIONS' and 'FORCE' cannot be found in the 6th line. Please check."
41+
nframes_dump = -1
42+
if calc_stress:
43+
nframes_dump = int(nlines/(total_natoms + 13))
44+
else:
45+
nframes_dump = int(nlines/(total_natoms + 9))
46+
assert(nframes_dump > 0), "Number of lines in MD_dump file = %d. Number of atoms = %d. The MD_dump file is incomplete."%(nlines, total_natoms)
3847
cells = np.zeros([nframes_dump, 3, 3])
3948
stresses = np.zeros([nframes_dump, 3, 3])
4049
forces = np.zeros([nframes_dump, total_natoms, 3])
@@ -47,12 +56,17 @@ def get_coords_from_dump(dumplines, natoms):
4756
# read in LATTICE_VECTORS
4857
for ix in range(3):
4958
cells[iframe, ix] = np.array([float(i) for i in re.split('\s+', dumplines[iline+3+ix])[-3:]]) * celldm
50-
stresses[iframe, ix] = np.array([float(i) for i in re.split('\s+', dumplines[iline+7+ix])[-3:]])
59+
if calc_stress:
60+
stresses[iframe, ix] = np.array([float(i) for i in re.split('\s+', dumplines[iline+7+ix])[-3:]])
5161
for iat in range(total_natoms):
52-
coords[iframe, iat] = np.array([float(i) for i in re.split('\s+', dumplines[iline+11+iat])[-6:-3]])*celldm
53-
forces[iframe, iat] = np.array([float(i) for i in re.split('\s+', dumplines[iline+11+iat])[-3:]])
62+
if calc_stress:
63+
coords[iframe, iat] = np.array([float(i) for i in re.split('\s+', dumplines[iline+11+iat])[-6:-3]])*celldm
64+
forces[iframe, iat] = np.array([float(i) for i in re.split('\s+', dumplines[iline+11+iat])[-3:]])
65+
else:
66+
coords[iframe, iat] = np.array([float(i) for i in re.split('\s+', dumplines[iline+7+iat])[-6:-3]])*celldm
67+
forces[iframe, iat] = np.array([float(i) for i in re.split('\s+', dumplines[iline+7+iat])[-3:]])
5468
iframe += 1
55-
assert(iframe == nframes_dump)
69+
assert(iframe == nframes_dump), "iframe=%d, nframe_dump=%d. Number of frames does not match number of lines in MD_dump."%(iframe, nframes_dump)
5670
cells *= bohr2ang
5771
coords *= bohr2ang
5872
stresses *= kbar2evperang3
@@ -66,7 +80,7 @@ def get_energy(outlines, ndump, dump_freq):
6680
if nenergy%dump_freq == 0:
6781
energy.append(float(line.split()[-2]))
6882
nenergy+=1
69-
assert(ndump == len(energy))
83+
assert(ndump == len(energy)), "Number of total energies in running_md.log = %d. Number of frames in MD_dump = %d. Please check."%(len(energy), ndump)
7084
energy = np.array(energy)
7185
return energy
7286

dpdata/abacus/scf.py

Lines changed: 97 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -1,7 +1,7 @@
11
import os,sys
22
import numpy as np
33
from ..unit import EnergyConversion, PressureConversion, LengthConversion
4-
4+
import re
55
bohr2ang = LengthConversion("bohr", "angstrom").value()
66
ry2ev = EnergyConversion("rydberg", "eV").value()
77
kbar2evperang3 = PressureConversion("kbar", "eV/angstrom^3").value()
@@ -16,10 +16,10 @@ def get_block (lines, keyword, skip = 0, nlines = None):
1616
found = True
1717
blk_idx = idx + 1 + skip
1818
line_idx = 0
19-
while len(lines[blk_idx].split("\s+")) == 0:
19+
while len(re.split("\s+", lines[blk_idx])) == 0:
2020
blk_idx += 1
2121
while line_idx < nlines and blk_idx != len(lines):
22-
if len(lines[blk_idx].split("\s+")) == 0 or lines[blk_idx] == "":
22+
if len(re.split("\s+", lines[blk_idx])) == 0 or lines[blk_idx] == "":
2323
blk_idx+=1
2424
continue
2525
ret.append(lines[blk_idx])
@@ -184,6 +184,100 @@ def get_frame (fname):
184184
# print("virial = ", data['virials'])
185185
return data
186186

187+
def get_nele_from_stru(geometry_inlines):
188+
key_words_list = ["ATOMIC_SPECIES", "NUMERICAL_ORBITAL", "LATTICE_CONSTANT", "LATTICE_VECTORS", "ATOMIC_POSITIONS", "NUMERICAL_DESCRIPTOR"]
189+
keyword_sequence = []
190+
keyword_line_index = []
191+
atom_names = []
192+
atom_numbs = []
193+
for iline, line in enumerate(geometry_inlines):
194+
if line.split() == []:
195+
continue
196+
have_key_word = False
197+
for keyword in key_words_list:
198+
if keyword in line and keyword == line.split()[0]:
199+
keyword_sequence.append(keyword)
200+
keyword_line_index.append(iline)
201+
assert(len(keyword_line_index) == len(keyword_sequence))
202+
assert(len(keyword_sequence) > 0)
203+
keyword_line_index.append(len(geometry_inlines))
204+
205+
nele = 0
206+
for idx, keyword in enumerate(keyword_sequence):
207+
if keyword == "ATOMIC_SPECIES":
208+
for iline in range(keyword_line_index[idx]+1, keyword_line_index[idx+1]):
209+
if len(re.split("\s+", geometry_inlines[iline])) >= 3:
210+
nele += 1
211+
return nele
212+
213+
def get_frame_from_stru(fname):
214+
assert(type(fname) == str)
215+
with open(fname, 'r') as fp:
216+
geometry_inlines = fp.read().split('\n')
217+
nele = get_nele_from_stru(geometry_inlines)
218+
inlines = ["ntype %d" %nele]
219+
celldm, cell = get_cell(geometry_inlines)
220+
atom_names, natoms, types, coords = get_coords(celldm, cell, geometry_inlines, inlines)
221+
data = {}
222+
data['atom_names'] = atom_names
223+
data['atom_numbs'] = natoms
224+
data['atom_types'] = types
225+
data['cells'] = cell[np.newaxis, :, :]
226+
data['coords'] = coords[np.newaxis, :, :]
227+
data['orig'] = np.zeros(3)
228+
229+
return data
230+
231+
def make_unlabeled_stru(data, frame_idx, pp_file=None, numerical_orbital=None, numerical_descriptor=None, mass=None):
232+
out = "ATOMIC_SPECIES\n"
233+
for iele in range(len(data['atom_names'])):
234+
out += data['atom_names'][iele] + " "
235+
if mass is not None:
236+
out += "%d "%mass[iele]
237+
else:
238+
out += "1 "
239+
if pp_file is not None:
240+
out += "%s\n"%pp_file[iele]
241+
else:
242+
out += "\n"
243+
out += "\n"
244+
245+
if numerical_orbital is not None:
246+
assert(len(numerical_orbital) == len(data['atom_names']))
247+
out += "NUMERICAL_ORBITAL\n"
248+
for iele in range(len(numerical_orbital)):
249+
out += "%s\n"%numerical_orbital[iele]
250+
out += "\n"
251+
252+
if numerical_descriptor is not None:
253+
assert(type(numerical_descriptor) == str)
254+
out += "NUMERICAL_DESCRIPTOR\n%s\n"%numerical_descriptor
255+
out += "\n"
256+
257+
out += "LATTICE_CONSTANT\n"
258+
out += str(1/bohr2ang) + "\n\n"
259+
260+
out += "LATTICE_VECTORS\n"
261+
for ix in range(3):
262+
for iy in range(3):
263+
out += str(data['cells'][frame_idx][ix][iy]) + " "
264+
out += "\n"
265+
out += "\n"
266+
267+
out += "ATOMIC_POSITIONS\n"
268+
out += "Cartesian # Cartesian(Unit is LATTICE_CONSTANT)\n"
269+
#ret += "\n"
270+
natom_tot = 0
271+
for iele in range(len(data['atom_names'])):
272+
out += data['atom_names'][iele] + "\n"
273+
out += "0.0\n"
274+
out += str(data['atom_numbs'][iele]) + "\n"
275+
for iatom in range(data['atom_numbs'][iele]):
276+
out += "%.12f %.12f %.12f %d %d %d\n" % (data['coords'][frame_idx][natom_tot, 0], data['coords'][frame_idx][natom_tot, 1], data['coords'][frame_idx][natom_tot, 2], 1, 1, 1)
277+
natom_tot += 1
278+
assert(natom_tot == sum(data['atom_numbs']))
279+
return out
280+
187281
#if __name__ == "__main__":
188282
# path = "/home/lrx/work/12_ABACUS_dpgen_interface/dpdata/dpdata/tests/abacus.scf"
189283
# data = get_frame(path)

dpdata/amber/sqm.py

Lines changed: 8 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -5,7 +5,6 @@
55
kcal2ev = EnergyConversion("kcal_mol", "eV").value()
66

77
START = 0
8-
READ_ENERGY = 1
98
READ_CHARGE = 2
109
READ_COORDS_START = 3
1110
READ_COORDS = 6
@@ -25,19 +24,19 @@ def parse_sqm_out(fname):
2524
flag = START
2625
for line in f:
2726
if line.startswith(" Total SCF energy"):
28-
flag = READ_ENERGY
27+
energy = float(line.strip().split()[-2])
28+
energies = [energy]
2929
elif line.startswith(" Atom Element Mulliken Charge"):
3030
flag = READ_CHARGE
31+
charges = []
3132
elif line.startswith(" Total Mulliken Charge"):
3233
flag = START
3334
elif line.startswith(" Final Structure"):
3435
flag = READ_COORDS_START
36+
coords = []
3537
elif line.startswith("QMMM: Forces on QM atoms"):
3638
flag = READ_FORCES
37-
elif flag == READ_ENERGY:
38-
energy = float(line.strip().split()[-2])
39-
energies.append(energy)
40-
flag = START
39+
forces = []
4140
elif flag == READ_CHARGE:
4241
ls = line.strip().split()
4342
atom_symbols.append(ls[-2])
@@ -50,6 +49,9 @@ def parse_sqm_out(fname):
5049
flag = START
5150
elif flag == READ_FORCES:
5251
ll = line.strip()
52+
if not ll.startswith("QMMM: Atm "):
53+
flag = START
54+
continue
5355
forces.append([float(ll[-60:-40]), float(ll[-40:-20]), float(ll[-20:])])
5456
if len(forces) == len(charges):
5557
flag = START

dpdata/ase_calculator.py

Lines changed: 76 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,76 @@
1+
from typing import List, Optional, TYPE_CHECKING
2+
3+
from ase.calculators.calculator import (
4+
Calculator, all_changes, PropertyNotImplementedError
5+
)
6+
7+
import dpdata
8+
from .driver import Driver
9+
10+
if TYPE_CHECKING:
11+
from ase import Atoms
12+
13+
14+
class DPDataCalculator(Calculator):
15+
"""Implementation of ASE deepmd calculator based on a driver.
16+
17+
Parameters
18+
----------
19+
driver : Driver
20+
dpdata driver
21+
"""
22+
23+
name = "dpdata"
24+
implemented_properties = [
25+
"energy", "free_energy", "forces", "virial", "stress"]
26+
27+
def __init__(
28+
self,
29+
driver: Driver,
30+
**kwargs
31+
) -> None:
32+
Calculator.__init__(self, label=Driver.__name__, **kwargs)
33+
self.driver = driver
34+
35+
def calculate(
36+
self,
37+
atoms: Optional["Atoms"] = None,
38+
properties: List[str] = ["energy", "forces"],
39+
system_changes: List[str] = all_changes,
40+
):
41+
"""Run calculation with a driver.
42+
43+
Parameters
44+
----------
45+
atoms : Optional[Atoms], optional
46+
atoms object to run the calculation on, by default None
47+
properties : List[str], optional
48+
unused, only for function signature compatibility,
49+
by default ["energy", "forces"]
50+
system_changes : List[str], optional
51+
unused, only for function signature compatibility, by default all_changes
52+
"""
53+
if atoms is not None:
54+
self.atoms = atoms.copy()
55+
56+
system = dpdata.System(self.atoms, fmt="ase/structure")
57+
data = system.predict(driver=self.driver).data
58+
59+
self.results['energy'] = data['energies'][0]
60+
# see https://gitlab.com/ase/ase/-/merge_requests/2485
61+
self.results['free_energy'] = data['energies'][0]
62+
self.results['forces'] = data['forces'][0]
63+
if 'virials' in data:
64+
self.results['virial'] = data['virials'][0].reshape(3, 3)
65+
66+
# convert virial into stress for lattice relaxation
67+
if "stress" in properties:
68+
if sum(atoms.get_pbc()) > 0:
69+
# the usual convention (tensile stress is positive)
70+
# stress = -virial / volume
71+
stress = -0.5 * (data['virials'][0].copy() + data['virials'][0].copy().T) / \
72+
atoms.get_volume()
73+
# Voigt notation
74+
self.results['stress'] = stress.flat[[0, 4, 8, 5, 2, 1]]
75+
else:
76+
raise PropertyNotImplementedError

dpdata/bond_order_system.py

Lines changed: 8 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -1,15 +1,13 @@
11
#%%
22
# Bond Order System
3-
from dpdata.system import System, LabeledSystem, check_System, load_format
3+
import numpy as np
4+
from dpdata.system import System, LabeledSystem, load_format, DataType, Axis
45
import dpdata.rdkit.utils
56
from dpdata.rdkit.sanitize import Sanitizer, SanitizeError
67
from copy import deepcopy
78
from rdkit.Chem import Conformer
89
# import dpdata.rdkit.mol2
910

10-
def check_BondOrderSystem(data):
11-
check_System(data)
12-
assert ('bonds' in data.keys())
1311

1412
class BondOrderSystem(System):
1513
'''
@@ -23,6 +21,11 @@ class BondOrderSystem(System):
2321
1 - single bond, 2 - double bond, 3 - triple bond, 1.5 - aromatic bond
2422
- `d_example['formal_charges']` : a numpy array of size 5 x 1
2523
'''
24+
DTYPES = System.DTYPES + (
25+
DataType("bonds", np.ndarray, (Axis.NBONDS, 3)),
26+
DataType("formal_charges", np.ndarray, (Axis.NATOMS,)),
27+
)
28+
2629
def __init__(self,
2730
file_name = None,
2831
fmt = 'auto',
@@ -86,6 +89,7 @@ def __init__(self,
8689

8790
if type_map:
8891
self.apply_type_map(type_map)
92+
self.check_data()
8993

9094
def from_fmt_obj(self, fmtobj, file_name, **kwargs):
9195
mol = fmtobj.from_bond_order_system(file_name, **kwargs)
@@ -104,9 +108,6 @@ def to_fmt_obj(self, fmtobj, *args, **kwargs):
104108
self.rdkit_mol.AddConformer(conf, assignId=True)
105109
return fmtobj.to_bond_order_system(self.data, self.rdkit_mol, *args, **kwargs)
106110

107-
def __repr__(self):
108-
return self.__str__()
109-
110111
def __str__(self):
111112
'''
112113
A brief summary of the system

dpdata/cp2k/output.py

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -3,7 +3,7 @@
33
import re
44
from collections import OrderedDict
55

6-
from scipy.constants.constants import R
6+
from scipy.constants import R
77
from .cell import cell_to_low_triangle
88
from ..unit import EnergyConversion, LengthConversion, ForceConversion, PressureConversion
99

@@ -268,7 +268,7 @@ def handle_single_xyz_frame(self, lines):
268268
#info_dict['atom_types'] = np.asarray(atom_types_list)
269269
info_dict['coords'] = np.asarray([coords_list]).astype('float32')
270270
info_dict['energies'] = np.array([energy]).astype('float32')
271-
info_dict['orig']=[0,0,0]
271+
info_dict['orig'] = np.zeros(3)
272272
return info_dict
273273

274274
#%%

0 commit comments

Comments
 (0)