Skip to content

Commit fda345f

Browse files
committed
merge
2 parents 3cfad49 + 707f595 commit fda345f

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

68 files changed

+10505
-277
lines changed

.travis.yml

Lines changed: 5 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -5,8 +5,12 @@ python:
55
- "3.6"
66
- "3.6-dev" # 3.6 development branch
77
# command to install dependencies
8+
before_install:
9+
- pip install coverage codecov
810
install:
911
- pip install .
1012
# command to run tests
1113
script:
12-
- cd tests && python -m unittest
14+
- cd tests && coverage run --source=../dpdata -m unittest && cd .. && coverage combine tests/.coverage && coverage report
15+
after_success:
16+
- codecov

README.md

Lines changed: 10 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -58,10 +58,18 @@ The `System` or `LabeledSystem` can be constructed from the following file forma
5858
| vasp | xml | True | True | LabeledSystem | 'vasp/xml' |
5959
| lammps | lmp | False | False | System | 'lammps/lmp' |
6060
| lammps | dump | True | False | System | 'lammps/dump' |
61+
| deepmd | raw | True | False | System | 'deepmd/raw' |
62+
| deepmd | npy | True | False | System | 'deepmd/npy' |
6163
| deepmd | raw | True | True | LabeledSystem | 'deepmd/raw' |
64+
| deepmd | npy | True | True | LabeledSystem | 'deepmd/npy' |
6265
| gaussian| log | False | True | LabeledSystem | 'gaussian/log'|
63-
64-
66+
| gaussian| log | True | True | LabeledSystem | 'gaussian/md' |
67+
| siesta| output | False | True | LabeledSystem | 'siesta/output'|
68+
| siesta| aimd_output | True | True | LabeledSystem | 'siesta/aimd_output' |
69+
| cp2k | output | False | True | LabeledSystem | 'cp2k/output' |
70+
| QE | log | False | True | LabeledSystem | 'qe/pw/scf' |
71+
| QE | log | True | False | System | 'qe/cp/traj' |
72+
| QE | log | True | True | LabeledSystem | 'qe/cp/traj' |
6573

6674
## Access data
6775
These properties stored in `System` and `LabeledSystem` can be accessed by operator `[]` with the key of the property supplied, for example

dpdata/cp2k/output.py

Lines changed: 76 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,76 @@
1+
import numpy as np
2+
3+
4+
def get_frames (fname) :
5+
coord_flag = False
6+
force_flag = False
7+
eV = 2.72113838565563E+01
8+
angstrom = 5.29177208590000E-01
9+
fp = open(fname)
10+
atom_symbol_list = []
11+
cell = []
12+
coord = []
13+
force = []
14+
15+
for idx, ii in enumerate(fp) :
16+
if 'CELL| Vector' in ii :
17+
cell.append(ii.split()[4:7])
18+
if 'Atom Kind Element' in ii :
19+
coord_flag = True
20+
coord_idx = idx
21+
# get the coord block info
22+
if coord_flag :
23+
if (idx > coord_idx + 1) :
24+
if (ii == '\n') :
25+
coord_flag = False
26+
else :
27+
coord.append(ii.split()[4:7])
28+
atom_symbol_list.append(ii.split()[2])
29+
if 'ENERGY|' in ii :
30+
energy = (ii.split()[8])
31+
if ' Atom Kind ' in ii :
32+
force_flag = True
33+
force_idx = idx
34+
if force_flag :
35+
if (idx > force_idx) :
36+
if 'SUM OF ATOMIC FORCES' in ii :
37+
force_flag = False
38+
else :
39+
force.append(ii.split()[3:6])
40+
fp.close()
41+
assert(coord), "cannot find coords"
42+
assert(energy), "cannot find energies"
43+
assert(force), "cannot find forces"
44+
45+
#conver to float array and add extra dimension for nframes
46+
cell = np.array(cell)
47+
cell = cell.astype(np.float)
48+
cell = cell[np.newaxis, :, :]
49+
coord = np.array(coord)
50+
coord = coord.astype(np.float)
51+
coord = coord[np.newaxis, :, :]
52+
atom_symbol_list = np.array(atom_symbol_list)
53+
force = np.array(force)
54+
force = force.astype(np.float)
55+
force = force[np.newaxis, :, :]
56+
force = force * eV / angstrom
57+
energy = float(energy) * eV
58+
energy = np.array(energy)
59+
energy = energy[np.newaxis]
60+
tmp_names, symbol_idx = np.unique(atom_symbol_list, return_index=True)
61+
atom_types = []
62+
atom_numbs = []
63+
#preserve the atom_name order
64+
atom_names = atom_symbol_list[np.sort(symbol_idx)]
65+
for jj in atom_symbol_list:
66+
for idx, ii in enumerate(atom_names):
67+
if (jj == ii) :
68+
atom_types.append(idx)
69+
for idx in range(len(atom_names)):
70+
atom_numbs.append(atom_types.count(idx))
71+
72+
atom_types = np.array(atom_types)
73+
74+
return list(atom_names), atom_numbs, atom_types, cell, coord, energy, force
75+
76+

dpdata/deepmd/comp.py

Lines changed: 40 additions & 23 deletions
Original file line numberDiff line numberDiff line change
@@ -2,20 +2,26 @@
22
import numpy as np
33
from .raw import load_type
44

5+
def _cond_load_data(fname) :
6+
tmp = None
7+
if os.path.isfile(fname) :
8+
tmp = np.load(fname)
9+
return tmp
10+
511
def _load_set(folder) :
612
cells = np.load(os.path.join(folder, 'box.npy'))
713
coords = np.load(os.path.join(folder, 'coord.npy'))
8-
eners = np.load(os.path.join(folder, 'energy.npy'))
9-
forces = np.load(os.path.join(folder, 'force.npy'))
10-
virs = None
11-
if os.path.isfile(os.path.join(folder, 'virial.npy')) :
12-
virs = np.load(os.path.join(folder, 'virial.npy'))
14+
eners = _cond_load_data(os.path.join(folder, 'energy.npy'))
15+
forces = _cond_load_data(os.path.join(folder, 'force.npy'))
16+
virs = _cond_load_data(os.path.join(folder, 'virial.npy'))
1317
return cells, coords, eners, forces, virs
1418

15-
def to_system_data(folder, type_map = None) :
19+
def to_system_data(folder,
20+
type_map = None,
21+
labels = True) :
22+
# data is empty
1623
data = load_type(folder, type_map = type_map)
1724
data['orig'] = np.zeros([3])
18-
data['virials'] = []
1925
sets = glob.glob(os.path.join(folder, 'set.*'))
2026
all_cells = []
2127
all_coords = []
@@ -27,14 +33,19 @@ def to_system_data(folder, type_map = None) :
2733
nframes = np.reshape(cells, [-1,3,3]).shape[0]
2834
all_cells.append(np.reshape(cells, [nframes,3,3]))
2935
all_coords.append(np.reshape(coords, [nframes,-1,3]))
30-
all_eners.append(np.reshape(eners, [nframes]))
31-
all_forces.append(np.reshape(forces, [nframes,-1,3]))
32-
if virs is not None and len(virs) > 0:
33-
virs = all_virs.append(np.reshape(virs, [nframes,3,3]))
36+
if labels:
37+
if eners is not None and len(eners) > 0:
38+
all_eners.append(np.reshape(eners, [nframes]))
39+
if forces is not None and len(forces) > 0:
40+
all_forces.append(np.reshape(forces, [nframes,-1,3]))
41+
if virs is not None and len(virs) > 0:
42+
all_virs.append(np.reshape(virs, [nframes,3,3]))
3443
data['cells'] = np.concatenate(all_cells, axis = 0)
35-
data['coords'] = np.concatenate(all_coords, axis = 0)
36-
data['energies'] = np.concatenate(all_eners, axis = 0)
37-
data['forces'] = np.concatenate(all_forces, axis = 0)
44+
data['coords'] = np.concatenate(all_coords, axis = 0)
45+
if len(all_eners) > 0 :
46+
data['energies'] = np.concatenate(all_eners, axis = 0)
47+
if len(all_forces) > 0 :
48+
data['forces'] = np.concatenate(all_forces, axis = 0)
3849
if len(all_virs) > 0:
3950
data['virials'] = np.concatenate(all_virs, axis = 0)
4051
return data
@@ -55,16 +66,20 @@ def dump(folder,
5566
raise RuntimeError('found ' + str(sets) + ' in ' + folder + 'not a clean deepmd raw dir. please firstly clean set.* then try compress')
5667
# dump raw
5768
np.savetxt(os.path.join(folder, 'type.raw'), data['atom_types'], fmt = '%d')
69+
np.savetxt(os.path.join(folder, 'type_map.raw'), data['atom_names'], fmt = '%s')
5870
# reshape frame properties and convert prec
5971
nframes = data['cells'].shape[0]
6072
cells = np.reshape(data['cells'], [nframes, 9]).astype(comp_prec)
6173
coords = np.reshape(data['coords'], [nframes, -1]).astype(comp_prec)
62-
eners = np.reshape(data['energies'], [nframes ]).astype(comp_prec)
63-
forces = np.reshape(data['forces'], [nframes, -1]).astype(comp_prec)
64-
if len(data['virials']) > 0 :
65-
virials = np.reshape(data['virials'], [nframes, 9]).astype(comp_prec)
66-
else :
67-
virials = []
74+
eners = None
75+
forces = None
76+
virials = None
77+
if 'energies' in data:
78+
eners = np.reshape(data['energies'], [nframes ]).astype(comp_prec)
79+
if 'forces' in data:
80+
forces = np.reshape(data['forces'], [nframes, -1]).astype(comp_prec)
81+
if 'virials' in data :
82+
virials = np.reshape(data['virials'], [nframes, 9]).astype(comp_prec)
6883
if 'atom_pref' in data:
6984
atom_pref = np.reshape(data['atom_pref'], [nframes, -1]).astype(comp_prec)
7085
# dump frame properties: cell, coord, energy, force and virial
@@ -78,9 +93,11 @@ def dump(folder,
7893
os.makedirs(set_folder)
7994
np.save(os.path.join(set_folder, 'box'), cells [set_stt:set_end])
8095
np.save(os.path.join(set_folder, 'coord'), coords [set_stt:set_end])
81-
np.save(os.path.join(set_folder, 'energy'), eners [set_stt:set_end])
82-
np.save(os.path.join(set_folder, 'force'), forces [set_stt:set_end])
83-
if len(virials) > 0:
96+
if eners is not None:
97+
np.save(os.path.join(set_folder, 'energy'), eners [set_stt:set_end])
98+
if forces is not None:
99+
np.save(os.path.join(set_folder, 'force'), forces [set_stt:set_end])
100+
if virials is not None:
84101
np.save(os.path.join(set_folder, 'virial'), virials[set_stt:set_end])
85102
if 'atom_pref' in data:
86103
np.save(os.path.join(set_folder, "atom_pref"), atom_pref[set_stt:set_end])

dpdata/deepmd/raw.py

Lines changed: 33 additions & 19 deletions
Original file line numberDiff line numberDiff line change
@@ -4,40 +4,51 @@
44
def load_type(folder, type_map = None) :
55
data = {}
66
data['atom_types'] \
7-
= np.loadtxt(os.path.join(folder, 'type.raw')).astype(int)
7+
= np.loadtxt(os.path.join(folder, 'type.raw'), ndmin=1).astype(int)
88
ntypes = np.max(data['atom_types']) + 1
99
data['atom_numbs'] = []
1010
for ii in range (ntypes) :
1111
data['atom_numbs'].append(np.count_nonzero(data['atom_types'] == ii))
1212
data['atom_names'] = []
13-
if type_map == None :
13+
# if find type_map.raw, use it
14+
if os.path.isfile(os.path.join(folder, 'type_map.raw')) :
15+
with open(os.path.join(folder, 'type_map.raw')) as fp:
16+
my_type_map = fp.read().split()
17+
# else try to use arg type_map
18+
elif type_map is not None:
19+
my_type_map = type_map
20+
# in the last case, make artificial atom names
21+
else:
22+
my_type_map = []
1423
for ii in range(ntypes) :
15-
data['atom_names'].append('Type_%d' % ii)
16-
else :
17-
assert(len(type_map) >= len(data['atom_numbs']))
18-
for ii in range(len(data['atom_numbs'])) :
19-
data['atom_names'].append(type_map[ii])
24+
my_type_map.append('Type_%d' % ii)
25+
assert(len(my_type_map) >= len(data['atom_numbs']))
26+
for ii in range(len(data['atom_numbs'])) :
27+
data['atom_names'].append(my_type_map[ii])
28+
2029
return data
2130

2231

23-
def to_system_data(folder, type_map = None) :
32+
def to_system_data(folder, type_map = None, labels = True) :
2433
if os.path.isdir(folder) :
2534
data = load_type(folder, type_map = type_map)
2635
data['orig'] = np.zeros([3])
27-
data['virials'] = []
2836
data['cells'] = np.loadtxt(os.path.join(folder, 'box.raw'))
2937
data['coords'] = np.loadtxt(os.path.join(folder, 'coord.raw'))
30-
data['energies'] = np.loadtxt(os.path.join(folder, 'energy.raw'))
31-
data['forces'] = np.loadtxt(os.path.join(folder, 'force.raw'))
3238
data['cells'] = np.reshape(data['cells'], [-1, 3, 3])
3339
nframes = data['cells'].shape[0]
3440
data['cells'] = np.reshape(data['cells'], [nframes, 3, 3])
3541
data['coords'] = np.reshape(data['coords'], [nframes, -1, 3])
36-
data['energies'] = np.reshape(data['energies'], [nframes])
37-
data['forces'] = np.reshape(data['forces'], [nframes, -1, 3])
38-
if os.path.exists(os.path.join(folder, 'virial.raw')) :
39-
data['virials'] = np.loadtxt(os.path.join(folder, 'virial.raw'))
40-
data['virials'] = np.reshape(data['virials'], [nframes, 3, 3])
42+
if labels :
43+
if os.path.exists(os.path.join(folder, 'energy.raw')) :
44+
data['energies'] = np.loadtxt(os.path.join(folder, 'energy.raw'))
45+
data['energies'] = np.reshape(data['energies'], [nframes])
46+
if os.path.exists(os.path.join(folder, 'force.raw')) :
47+
data['forces'] = np.loadtxt(os.path.join(folder, 'force.raw'))
48+
data['forces'] = np.reshape(data['forces'], [nframes, -1, 3])
49+
if os.path.exists(os.path.join(folder, 'virial.raw')) :
50+
data['virials'] = np.loadtxt(os.path.join(folder, 'virial.raw'))
51+
data['virials'] = np.reshape(data['virials'], [nframes, 3, 3])
4152
return data
4253
else :
4354
raise RuntimeError('not dir ' + folder)
@@ -47,11 +58,14 @@ def dump (folder, data) :
4758
os.makedirs(folder, exist_ok = True)
4859
nframes = data['cells'].shape[0]
4960
np.savetxt(os.path.join(folder, 'type.raw'), data['atom_types'], fmt = '%d')
61+
np.savetxt(os.path.join(folder, 'type_map.raw'), data['atom_names'], fmt = '%s')
5062
np.savetxt(os.path.join(folder, 'box.raw'), np.reshape(data['cells'], [nframes, 9]))
5163
np.savetxt(os.path.join(folder, 'coord.raw'), np.reshape(data['coords'], [nframes, -1]))
52-
np.savetxt(os.path.join(folder, 'energy.raw'), np.reshape(data['energies'], [nframes, 1]))
53-
np.savetxt(os.path.join(folder, 'force.raw'), np.reshape(data['forces'], [nframes, -1]))
54-
if len(data['virials']) != 0 :
64+
if 'energies' in data :
65+
np.savetxt(os.path.join(folder, 'energy.raw'), np.reshape(data['energies'], [nframes, 1]))
66+
if 'forces' in data :
67+
np.savetxt(os.path.join(folder, 'force.raw'), np.reshape(data['forces'], [nframes, -1]))
68+
if 'virials' in data :
5569
np.savetxt(os.path.join(folder, 'virial.raw'), np.reshape(data['virials'], [nframes, 9]))
5670

5771

dpdata/gaussian/log.py

Lines changed: 19 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -10,14 +10,14 @@
1010

1111
symbols = ['X', 'H', 'He', 'Li', 'Be', 'B', 'C', 'N', 'O', 'F', 'Ne', 'Na', 'Mg', 'Al', 'Si', 'P', 'S', 'Cl', 'Ar', 'K', 'Ca', 'Sc', 'Ti', 'V', 'Cr', 'Mn', 'Fe', 'Co', 'Ni', 'Cu', 'Zn', 'Ga', 'Ge', 'As', 'Se', 'Br', 'Kr', 'Rb', 'Sr', 'Y', 'Zr', 'Nb', 'Mo', 'Tc', 'Ru', 'Rh', 'Pd', 'Ag', 'Cd', 'In', 'Sn', 'Sb', 'Te', 'I', 'Xe', 'Cs', 'Ba', 'La', 'Ce', 'Pr', 'Nd', 'Pm', 'Sm', 'Eu', 'Gd', 'Tb', 'Dy', 'Ho', 'Er', 'Tm', 'Yb', 'Lu', 'Hf', 'Ta', 'W', 'Re', 'Os', 'Ir', 'Pt', 'Au', 'Hg', 'Tl', 'Pb', 'Bi', 'Po', 'At', 'Rn', 'Fr', 'Ra', 'Ac', 'Th', 'Pa', 'U', 'Np', 'Pu', 'Am', 'Cm', 'Bk', 'Cf', 'Es', 'Fm', 'Md', 'No', 'Lr', 'Rf', 'Db', 'Sg', 'Bh', 'Hs', 'Mt', 'Ds', 'Rg', 'Cn', 'Nh', 'Fl', 'Mc', 'Lv', 'Ts', 'Og']
1212

13-
def to_system_data(file_name):
13+
def to_system_data(file_name, md=False):
1414
data = {}
1515
# read from log lines
1616
flag = 0
17-
energy = 0
18-
coords = []
17+
energy_t = []
18+
coords_t = []
1919
atom_symbols = []
20-
forces = []
20+
forces_t = []
2121

2222
with open(file_name) as fp:
2323
for line in fp:
@@ -27,7 +27,7 @@ def to_system_data(file_name):
2727
elif line.startswith(" Center Atomic Forces (Hartrees/Bohr)"):
2828
flag = 1
2929
forces = []
30-
elif line.startswith(" Input orientation:"):
30+
elif line.startswith(" Input orientation:") or line.startswith(" Z-Matrix orientation:"):
3131
flag = 5
3232
coords = []
3333
atom_symbols = []
@@ -37,6 +37,9 @@ def to_system_data(file_name):
3737
elif flag == 4:
3838
# forces
3939
if line.startswith(" -------"):
40+
forces_t.append(forces)
41+
energy_t.append(energy)
42+
coords_t.append(coords)
4043
flag = 0
4144
else:
4245
s = line.split()
@@ -50,15 +53,20 @@ def to_system_data(file_name):
5053
coords.append([float(x) for x in s[3:6]])
5154
atom_symbols.append(symbols[int(s[1])])
5255

53-
assert(coords), "cannot find coords"
54-
assert(energy), "cannot find energies"
55-
assert(forces), "cannot find forces"
56+
assert(coords_t), "cannot find coords"
57+
assert(energy_t), "cannot find energies"
58+
assert(forces_t), "cannot find forces"
5659

5760
atom_names, data['atom_types'], atom_numbs = np.unique(atom_symbols, return_inverse=True, return_counts=True)
5861
data['atom_names'] = list(atom_names)
5962
data['atom_numbs'] = list(atom_numbs)
60-
data['forces'] = np.array([forces]) * force_convert
61-
data['energies'] = np.array([energy]) * energy_convert
62-
data['coords'] = np.array([coords])
63+
if not md:
64+
forces_t = forces_t[-1:]
65+
energy_t = energy_t[-1:]
66+
coords_t = coords_t[-1:]
67+
data['forces'] = np.array(forces_t) * force_convert
68+
data['energies'] = np.array(energy_t) * energy_convert
69+
data['coords'] = np.array(coords_t)
6370
data['orig'] = np.array([0, 0, 0])
71+
data['cells'] = np.array([[[100., 0., 0.], [0., 100., 0.], [0., 0., 100.]] for _ in energy_t])
6472
return data

0 commit comments

Comments
 (0)