Skip to content

Commit 1d1084d

Browse files
Merge pull request #216 from amcadmus/master
Merge recent development on devel into master
2 parents ea32d45 + f1859eb commit 1d1084d

File tree

139 files changed

+9257
-256
lines changed

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

139 files changed

+9257
-256
lines changed

.github/workflows/mirror_gitee.yml

Lines changed: 19 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,19 @@
1+
name: Mirror to Gitee Repo
2+
3+
on: [ push, delete, create ]
4+
5+
# Ensures that only one mirror task will run at a time.
6+
concurrency:
7+
group: git-mirror
8+
9+
jobs:
10+
git-mirror:
11+
runs-on: ubuntu-latest
12+
steps:
13+
- uses: wearerequired/git-mirror-action@v1
14+
env:
15+
ORGANIZATION: deepmodeling
16+
SSH_PRIVATE_KEY: ${{ secrets.SYNC_GITEE_PRIVATE_KEY }}
17+
with:
18+
source-repo: "https://github.com/deepmodeling/dpdata.git"
19+
destination-repo: "[email protected]:deepmodeling/dpdata.git"

.github/workflows/test.yml

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -23,7 +23,7 @@ jobs:
2323
- name: Install rdkit
2424
run: conda create -c conda-forge -n my-rdkit-env python=${{ matrix.python-version }} rdkit openbabel;
2525
- name: Install dependencies
26-
run: source $CONDA/bin/activate my-rdkit-env && pip install .[amber] coverage codecov
26+
run: source $CONDA/bin/activate my-rdkit-env && pip install .[amber,ase,pymatgen] coverage codecov
2727
- name: Test
2828
run: source $CONDA/bin/activate my-rdkit-env && cd tests && coverage run --source=../dpdata -m unittest && cd .. && coverage combine tests/.coverage && coverage report
2929
- name: Run codecov

README.md

Lines changed: 5 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,4 @@
1-
**dpdata** is a python package for manipulating DeePMD-kit, VASP, LAMMPS data formats.
1+
**dpdata** is a python package for manipulating data formats of software in computational science, including DeePMD-kit, VASP, LAMMPS, GROMACS, Gaussian.
22
dpdata only works with python 3.x.
33

44

@@ -80,6 +80,8 @@ The `System` or `LabeledSystem` can be constructed from the following file forma
8080
| Amber | multi | True | True | LabeledSystem | 'amber/md' |
8181
| Amber/sqm | sqm.out | False | False | System | 'sqm/out' |
8282
| Gromacs | gro | True | False | System | 'gromacs/gro' |
83+
| ABACUS | STRU | False | True | LabeledSystem | 'abacus/scf' |
84+
| ABACUS | cif | True | True | LabeledSystem | 'abacus/md' |
8385

8486

8587
The Class `dpdata.MultiSystems` can read data from a dir which may contains many files of different systems, or from single xyz file which contains different systems.
@@ -116,7 +118,7 @@ xyz_multi_systems.to_deepmd_raw('./my_deepmd_data/')
116118
```
117119

118120
You may also use the following code to parse muti-system:
119-
```
121+
```python
120122
from dpdata import LabeledSystem,MultiSystems
121123
from glob import glob
122124
"""
@@ -255,7 +257,7 @@ If a valence of 3 is detected on carbon, the formal charge will be assigned to -
255257

256258
# Plugins
257259

258-
One can follow [a simple example](plugin_example/) to add their own format by creating and installing plugins. It's crirical to add the [Format](dpdata/format.py) class to `entry_points['dpdata.plugins']` in `setup.py`:
260+
One can follow [a simple example](plugin_example/) to add their own format by creating and installing plugins. It's critical to add the [Format](dpdata/format.py) class to `entry_points['dpdata.plugins']` in `setup.py`:
259261
```py
260262
entry_points={
261263
'dpdata.plugins': [

docs/conf.py

Lines changed: 23 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -14,13 +14,14 @@
1414
#
1515
import os
1616
import sys
17+
from datetime import date
1718
sys.path.insert(0, os.path.abspath('..'))
1819

1920

2021
# -- Project information -----------------------------------------------------
2122

2223
project = 'dpdata'
23-
copyright = '2019, Han Wang'
24+
copyright = '2019-%d, Deep Modeling ' % date.today().year
2425
author = 'Han Wang'
2526

2627
# The short X.Y version
@@ -39,11 +40,12 @@
3940
# extensions coming with Sphinx (named 'sphinx.ext.*') or your custom
4041
# ones.
4142
extensions = [
42-
'sphinxcontrib.napoleon',
43+
'sphinx_rtd_theme',
4344
'sphinx.ext.mathjax',
4445
'sphinx.ext.viewcode',
45-
'sphinx.ext.githubpages',
46-
'm2r',
46+
'sphinx.ext.intersphinx',
47+
'numpydoc',
48+
'm2r2',
4749
]
4850

4951
# Add any paths that contain templates here, relative to this directory.
@@ -79,7 +81,7 @@
7981
# The theme to use for HTML and HTML Help pages. See the documentation for
8082
# a list of builtin themes.
8183
#
82-
html_theme = 'alabaster'
84+
html_theme = 'sphinx_rtd_theme'
8385

8486
# Theme options are theme-specific and customize the look and feel of a theme
8587
# further. For a list of options available for each theme, see the
@@ -90,7 +92,7 @@
9092
# Add any paths that contain custom static files (such as style sheets) here,
9193
# relative to this directory. They are copied after the builtin static files,
9294
# so a file named "default.css" will overwrite the builtin "default.css".
93-
html_static_path = ['_static']
95+
#html_static_path = ['_static']
9496

9597
# Custom sidebar templates, must be a dictionary that maps document names
9698
# to template names.
@@ -161,3 +163,18 @@
161163

162164

163165
# -- Extension configuration -------------------------------------------------
166+
def run_apidoc(_):
167+
from sphinx.ext.apidoc import main
168+
sys.path.append(os.path.join(os.path.dirname(__file__), '..'))
169+
cur_dir = os.path.abspath(os.path.dirname(__file__))
170+
module = os.path.join(cur_dir, "..", "dpdata")
171+
main(['-M', '--tocfile', 'api', '-H', 'API documentation', '-o', os.path.join(cur_dir, "api"), module, '--force'])
172+
173+
def setup(app):
174+
app.connect('builder-inited', run_apidoc)
175+
176+
177+
intersphinx_mapping = {
178+
"numpy": ("https://docs.scipy.org/doc/numpy/", None),
179+
"python": ("https://docs.python.org/", None),
180+
}

docs/index.rst

Lines changed: 1 addition & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -10,22 +10,11 @@ Welcome to dpdata's documentation!
1010
:maxdepth: 2
1111
:caption: Contents:
1212

13+
api/api
1314

1415
.. mdinclude:: ../README.md
1516

1617

17-
API documentation
18-
=================
19-
20-
.. automodule:: dpdata
21-
22-
.. autoclass:: System
23-
:members: __init__, __getitem__, get_nframes, get_natoms, sub_system, append, apply_pbc, to_lammps_lmp, to_vasp_poscar
24-
25-
.. autoclass:: LabeledSystem
26-
:members: __init__, sub_system, to_deepmd_raw, to_deepmd_npy
27-
28-
2918
Indices and tables
3019
==================
3120

docs/requirements.txt

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1 @@
1+
.[docs]

dpdata/abacus/md.py

Lines changed: 192 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,192 @@
1+
import os,sys
2+
import numpy as np
3+
from .scf import ry2ev, kbar2evperang3, get_block, get_geometry_in, get_cell, get_coords
4+
5+
# Read in geometries from an ABACUS MD trajectory.
6+
# The atomic coordinates are read in from generated files in OUT.XXXX.
7+
# Energies, forces
8+
# IMPORTANT: the program defaultly takes STRU input file as standard cell information,
9+
# therefore the direct and cartesan coordinates read could be different from the ones in
10+
# the output cif files!!!
11+
# It is highly recommanded to use ORTHOGANAL coordinates in STRU file if you wish to get
12+
# same coordinates in both dpdata and output cif files.
13+
14+
def get_path_out(fname, inlines):
15+
# This function is different from the same-name function in scf.py.
16+
# This function returns OUT.XXXX's base directory.
17+
path_out = os.path.join(fname, "OUT.ABACUS/")
18+
for line in inlines:
19+
if len(line)>0 and "suffix" in line and "suffix"==line.split()[0]:
20+
suffix = line.split()[1]
21+
path_out = os.path.join(fname, "OUT.%s/" % suffix)
22+
break
23+
return path_out
24+
25+
def get_coord_dump_freq(inlines):
26+
for line in inlines:
27+
if len(line)>0 and "md_dumpmdfred" in line and "md_dumpmdfred" == line.split()[0]:
28+
return int(line.split()[1])
29+
return 1
30+
31+
# set up a cell according to cell info in cif file.
32+
# maybe useful later
33+
'''
34+
def setup_cell(a, b, c, alpha, beta, gamma):
35+
cell = np.zeros(3, 3)
36+
cell[0, 0] = a
37+
cell[1, 0] = b*np.cos(gamma/180*np.pi)
38+
cell[1, 1] = b*np.sin(gamma/180*np.pi)
39+
cell[2, 0] = c*np.cos(beta/180*np.pi)
40+
cell[2, 1] = c*(b*np.cos(alpha/180*np.pi) - cell[1, 0]*np.cos(beta/180*np.pi))/cell[1, 1]
41+
cell[2, 2] = np.sqrt(c**2 - cell[2, 0]**2 - cell[2, 1]**2)
42+
return cell
43+
'''
44+
45+
def get_single_coord_from_cif(pos_file, atom_names, natoms, cell):
46+
assert(len(atom_names) == len(natoms))
47+
nele = len(atom_names)
48+
total_natoms = sum(natoms)
49+
coord = np.zeros([total_natoms, 3])
50+
a = 0
51+
b = 0
52+
c = 0
53+
alpha = 0
54+
beta = 0
55+
gamma = 0
56+
with open(pos_file, "r") as fp:
57+
lines = fp.read().split("\n")
58+
for line in lines:
59+
if "_cell_length_a" in line:
60+
a = float(line.split()[1])
61+
if "_cell_length_b" in line:
62+
b = float(line.split()[1])
63+
if "_cell_length_c" in line:
64+
c = float(line.split()[1])
65+
if "_cell_angle_alpha" in line:
66+
alpha = float(line.split()[1])
67+
if "_cell_angle_beta" in line:
68+
beta = float(line.split()[1])
69+
if "_cell_angle_gamma" in line:
70+
gamma = float(line.split()[1])
71+
assert(a > 0 and b > 0 and c > 0 and alpha > 0 and beta > 0 and gamma > 0)
72+
#cell = setup_cell(a, b, c, alpha, beta, gamma)
73+
coord_lines = get_block(lines=lines, keyword="_atom_site_fract_z", skip=0, nlines = total_natoms)
74+
75+
ia_idx = 0
76+
for it in range(nele):
77+
for ia in range(natoms[it]):
78+
coord_line = coord_lines[ia_idx].split()
79+
assert(coord_line[0] == atom_names[it])
80+
coord[ia_idx, 0] = float(coord_line[1])
81+
coord[ia_idx, 1] = float(coord_line[2])
82+
coord[ia_idx, 2] = float(coord_line[3])
83+
ia_idx+=1
84+
coord = np.matmul(coord, cell)
85+
# important! Coordinates are converted to Cartesian coordinate.
86+
return coord
87+
88+
89+
def get_coords_from_cif(ndump, dump_freq, atom_names, natoms, types, path_out, cell):
90+
total_natoms = sum(natoms)
91+
#cell = np.zeros(ndump, 3, 3)
92+
coords = np.zeros([ndump, total_natoms, 3])
93+
pos_file = os.path.join(path_out, "STRU_READIN_ADJUST.cif")
94+
# frame 0 file is different from any other frames
95+
coords[0] = get_single_coord_from_cif(pos_file, atom_names, natoms, cell)
96+
for dump_idx in range(1, ndump):
97+
pos_file = os.path.join(path_out, "md_pos_%d.cif" %(dump_idx*dump_freq))
98+
#print("dump_idx = %s" %dump_idx)
99+
coords[dump_idx] = get_single_coord_from_cif(pos_file, atom_names, natoms, cell)
100+
return coords
101+
102+
def get_energy_force_stress(outlines, inlines, dump_freq, ndump, natoms, atom_names):
103+
stress = None
104+
total_natoms = sum(natoms)
105+
for line in inlines:
106+
if len(line)>0 and "stress" in line and "stress" == line.split()[0] and "1" == line.split()[1]:
107+
stress = np.zeros([ndump, 3, 3])
108+
break
109+
if type(stress) != np.ndarray:
110+
print("The ABACUS program has no stress output. Stress will not be read.")
111+
nenergy = 0
112+
nforce = 0
113+
nstress = 0
114+
energy = np.zeros(ndump)
115+
force = np.zeros([ndump, total_natoms, 3])
116+
117+
for line_idx, line in enumerate(outlines):
118+
if "final etot is" in line:
119+
if nenergy%dump_freq == 0:
120+
energy[int(nenergy/dump_freq)] = float(line.split()[-2])
121+
nenergy+=1
122+
if "TOTAL-FORCE (eV/Angstrom)" in line:
123+
for iatom in range(0, total_natoms):
124+
force_line = outlines[line_idx+5+iatom]
125+
atom_force = [float(i) for i in force_line.split()[1:]]
126+
assert(len(atom_force) == 3)
127+
atom_force = np.array(atom_force)
128+
if nforce%dump_freq == 0:
129+
force[int(nforce/dump_freq), iatom] = atom_force
130+
nforce+=1
131+
assert(nforce==nenergy)
132+
if "TOTAL-STRESS (KBAR)" in line:
133+
for idx in range(0, 3):
134+
stress_line = outlines[line_idx+4+idx]
135+
single_stress = [float(i) for i in stress_line.split()]
136+
if len(single_stress) != 3:
137+
print(single_stress)
138+
assert(len(single_stress) == 3)
139+
single_stress = np.array(single_stress)
140+
if nstress%dump_freq == 0:
141+
stress[int(nstress/dump_freq), idx] = single_stress
142+
nstress+=1
143+
assert(nstress==nforce)
144+
if type(stress) == np.ndarray:
145+
stress *= kbar2evperang3
146+
return energy, force, stress
147+
148+
149+
def get_frame (fname):
150+
if type(fname) == str:
151+
# if the input parameter is only one string, it is assumed that it is the
152+
# base directory containing INPUT file;
153+
path_in = os.path.join(fname, "INPUT")
154+
else:
155+
raise RuntimeError('invalid input')
156+
with open(path_in, 'r') as fp:
157+
inlines = fp.read().split('\n')
158+
geometry_path_in = get_geometry_in(fname, inlines) # base dir of STRU
159+
path_out = get_path_out(fname, inlines)
160+
161+
with open(geometry_path_in, 'r') as fp:
162+
geometry_inlines = fp.read().split('\n')
163+
celldm, cell = get_cell(geometry_inlines)
164+
atom_names, natoms, types, coords = get_coords(celldm, cell, geometry_inlines, inlines)
165+
# This coords is not to be used.
166+
dump_freq = get_coord_dump_freq(inlines = inlines)
167+
ndump = int(os.popen("ls -l %s | grep 'md_pos_' | wc -l" %path_out).readlines()[0])
168+
# number of dumped geometry files
169+
coords = get_coords_from_cif(ndump, dump_freq, atom_names, natoms, types, path_out, cell)
170+
171+
# TODO: Read in energies, forces and pressures.
172+
with open(os.path.join(path_out, "running_md.log"), 'r') as fp:
173+
outlines = fp.read().split('\n')
174+
energy, force, stress = get_energy_force_stress(outlines, inlines, dump_freq, ndump, natoms, atom_names)
175+
if type(stress) == np.ndarray:
176+
stress *= np.linalg.det(cell)
177+
data = {}
178+
data['atom_names'] = atom_names
179+
data['atom_numbs'] = natoms
180+
data['atom_types'] = types
181+
data['cells'] = np.zeros([ndump, 3, 3])
182+
for idx in range(ndump):
183+
data['cells'][:, :, :] = cell
184+
data['coords'] = coords
185+
data['energies'] = energy
186+
data['forces'] = force
187+
data['virials'] = stress
188+
if type(data['virials']) != np.ndarray:
189+
del data['virials']
190+
data['orig'] = np.zeros(3)
191+
192+
return data

dpdata/abacus/scf.py

Lines changed: 7 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -1,11 +1,10 @@
11
import os,sys
22
import numpy as np
3+
from ..unit import EnergyConversion, PressureConversion, LengthConversion
34

4-
bohr2ang = 0.5291770
5-
ry2ev = 13.605698
6-
kbar2evperang3 = 1e3 / 1.6021892e6
7-
# The consts are cited from $ABACUS_ROOT/source/src_global/constant.h
8-
5+
bohr2ang = LengthConversion("bohr", "angstrom").value()
6+
ry2ev = EnergyConversion("rydberg", "eV").value()
7+
kbar2evperang3 = PressureConversion("kbar", "eV/angstrom^3").value()
98

109
def get_block (lines, keyword, skip = 0, nlines = None):
1110
ret = []
@@ -175,6 +174,6 @@ def get_frame (fname):
175174
# print("virial = ", data['virials'])
176175
return data
177176

178-
if __name__ == "__main__":
179-
path = "/home/lrx/work/12_ABACUS_dpgen_interface/dpdata/dpdata/tests/abacus.scf"
180-
data = get_frame(path)
177+
#if __name__ == "__main__":
178+
# path = "/home/lrx/work/12_ABACUS_dpgen_interface/dpdata/dpdata/tests/abacus.scf"
179+
# data = get_frame(path)

dpdata/amber/md.py

Lines changed: 4 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -3,9 +3,11 @@
33
from scipy.io import netcdf
44
import numpy as np
55
from dpdata.amber.mask import pick_by_amber_mask
6+
from dpdata.unit import EnergyConversion
7+
from ..periodic_table import ELEMENTS
68

7-
kcalmol2eV= 0.04336410390059322
8-
symbols = ['X', 'H', 'He', 'Li', 'Be', 'B', 'C', 'N', 'O', 'F', 'Ne', 'Na', 'Mg', 'Al', 'Si', 'P', 'S', 'Cl', 'Ar', 'K', 'Ca', 'Sc', 'Ti', 'V', 'Cr', 'Mn', 'Fe', 'Co', 'Ni', 'Cu', 'Zn', 'Ga', 'Ge', 'As', 'Se', 'Br', 'Kr', 'Rb', 'Sr', 'Y', 'Zr', 'Nb', 'Mo', 'Tc', 'Ru', 'Rh', 'Pd', 'Ag', 'Cd', 'In', 'Sn', 'Sb', 'Te', 'I', 'Xe', 'Cs', 'Ba', 'La', 'Ce', 'Pr', 'Nd', 'Pm', 'Sm', 'Eu', 'Gd', 'Tb', 'Dy', 'Ho', 'Er', 'Tm', 'Yb', 'Lu', 'Hf', 'Ta', 'W', 'Re', 'Os', 'Ir', 'Pt', 'Au', 'Hg', 'Tl', 'Pb', 'Bi', 'Po', 'At', 'Rn', 'Fr', 'Ra', 'Ac', 'Th', 'Pa', 'U', 'Np', 'Pu', 'Am', 'Cm', 'Bk', 'Cf', 'Es', 'Fm', 'Md', 'No', 'Lr', 'Rf', 'Db', 'Sg', 'Bh', 'Hs', 'Mt', 'Ds', 'Rg', 'Cn', 'Nh', 'Fl', 'Mc', 'Lv', 'Ts', 'Og']
9+
kcalmol2eV = EnergyConversion("kcal_mol", "eV").value()
10+
symbols = ['X'] + ELEMENTS
911

1012
energy_convert = kcalmol2eV
1113
force_convert = energy_convert

0 commit comments

Comments
 (0)