Skip to content

Commit b58aa63

Browse files
authored
add deepmd/hdf5 format (#203)
* add deepmd/hdf5 format To support deepmodeling/deepmd-kit#1163. * bugfix * add an example
1 parent 23e9f3b commit b58aa63

File tree

4 files changed

+243
-1
lines changed

4 files changed

+243
-1
lines changed

dpdata/deepmd/hdf5.py

Lines changed: 149 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,149 @@
1+
"""Utils for deepmd/hdf5 format."""
2+
import h5py
3+
import numpy as np
4+
5+
from wcmatch.glob import globfilter
6+
7+
8+
__all__ = ['to_system_data', 'dump']
9+
10+
def to_system_data(f: h5py.File,
11+
folder: str,
12+
type_map: list = None,
13+
labels: bool = True) :
14+
"""Load a HDF5 file.
15+
16+
Parameters
17+
----------
18+
f : h5py.File
19+
HDF5 file object
20+
folder : str
21+
path in the HDF5 file
22+
type_map : list
23+
type map
24+
labels : bool
25+
labels
26+
"""
27+
g = f[folder] if folder else f
28+
29+
data = {}
30+
data['atom_types'] = g['type.raw'][:]
31+
ntypes = np.max(data['atom_types']) + 1
32+
natoms = data['atom_types'].size
33+
data['atom_numbs'] = []
34+
for ii in range (ntypes) :
35+
data['atom_numbs'].append(np.count_nonzero(data['atom_types'] == ii))
36+
data['atom_names'] = []
37+
# if find type_map.raw, use it
38+
if 'type_map.raw' in g.keys():
39+
my_type_map = list(np.char.decode(g['type_map.raw'][:]))
40+
# else try to use arg type_map
41+
elif type_map is not None:
42+
my_type_map = type_map
43+
# in the last case, make artificial atom names
44+
else:
45+
my_type_map = []
46+
for ii in range(ntypes) :
47+
my_type_map.append('Type_%d' % ii)
48+
assert(len(my_type_map) >= len(data['atom_numbs']))
49+
for ii in range(len(data['atom_numbs'])) :
50+
data['atom_names'].append(my_type_map[ii])
51+
52+
data['orig'] = np.zeros([3])
53+
if 'nopbc' in g.keys():
54+
data['nopbc'] = True
55+
sets = globfilter(g.keys(), 'set.*')
56+
57+
data_types = {
58+
'cells': {'fn': 'box', 'labeled': False, 'shape': (3,3), 'required': 'nopbc' not in data},
59+
'coords': {'fn': 'coord', 'labeled': False, 'shape': (natoms,3), 'required': True},
60+
'energies': {'fn': 'energy', 'labeled': True, 'shape': tuple(), 'required': False},
61+
'forces': {'fn': 'force', 'labeled': True, 'shape': (natoms,3), 'required': False},
62+
'virials': {'fn': 'virial', 'labeled': True, 'shape': (3,3), 'required': False},
63+
}
64+
65+
for dt, prop in data_types.items():
66+
all_data = []
67+
68+
for ii in sets:
69+
set = g[ii]
70+
fn = '%s.npy' % prop['fn']
71+
if fn in set.keys():
72+
dd = set[fn][:]
73+
nframes = dd.shape[0]
74+
all_data.append(np.reshape(dd, (nframes, *prop['shape'])))
75+
elif prop['required']:
76+
raise RuntimeError("%s/%s/%s not found" % (folder, ii, fn))
77+
78+
if len(all_data) > 0 :
79+
data[dt] = np.concatenate(all_data, axis = 0)
80+
return data
81+
82+
def dump(f: h5py.File,
83+
folder: str,
84+
data: dict,
85+
set_size = 5000,
86+
comp_prec = np.float32,
87+
) -> None:
88+
"""Dump data to a HDF5 file.
89+
90+
Parameters
91+
----------
92+
f : h5py.File
93+
HDF5 file object
94+
folder : str
95+
path in the HDF5 file
96+
data : dict
97+
System or LabeledSystem data
98+
set_size : int, default: 5000
99+
size of a set
100+
comp_prec : np.dtype, default: np.float32
101+
precision of data
102+
"""
103+
# if folder is None, use the root of the file
104+
if folder:
105+
if folder in f:
106+
del f[folder]
107+
g = f.create_group(folder)
108+
else:
109+
g = f
110+
# dump raw (array in fact)
111+
g.create_dataset('type.raw', data=data['atom_types'])
112+
g.create_dataset('type_map.raw', data=np.array(data['atom_names'], dtype='S'))
113+
# BondOrder System
114+
if "bonds" in data:
115+
g.create_dataset("bonds.raw", data=data['bonds'])
116+
if "formal_charges" in data:
117+
g.create_dataset("formal_charges.raw", data=data['formal_charges'])
118+
# reshape frame properties and convert prec
119+
nframes = data['cells'].shape[0]
120+
121+
nopbc = data.get("nopbc", False)
122+
reshaped_data = {}
123+
124+
data_types = {
125+
'cells': {'fn': 'box', 'shape': (nframes, 9), 'dump': not nopbc},
126+
'coords': {'fn': 'coord', 'shape': (nframes, -1), 'dump': True},
127+
'energies': {'fn': 'energy', 'shape': (nframes,), 'dump': True},
128+
'forces': {'fn': 'force', 'shape': (nframes, -1), 'dump': True},
129+
'virials': {'fn': 'virial', 'shape': (nframes, 9), 'dump': True},
130+
}
131+
for dt, prop in data_types.items():
132+
if dt in data:
133+
if prop['dump']:
134+
reshaped_data[dt] = np.reshape(data[dt], prop['shape']).astype(comp_prec)
135+
136+
# dump frame properties: cell, coord, energy, force and virial
137+
nsets = nframes // set_size
138+
if set_size * nsets < nframes :
139+
nsets += 1
140+
for ii in range(nsets) :
141+
set_stt = ii * set_size
142+
set_end = (ii+1) * set_size
143+
set_folder = g.create_group('set.%03d' % ii)
144+
for dt, prop in data_types.items():
145+
if dt in reshaped_data:
146+
set_folder.create_dataset('%s.npy' % prop['fn'], data=reshaped_data[dt][set_stt:set_end])
147+
148+
if nopbc:
149+
g.create_dataset("nopbc", True)

dpdata/plugins/deepmd.py

Lines changed: 48 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,8 @@
11
import dpdata.deepmd.raw
22
import dpdata.deepmd.comp
3+
import dpdata.deepmd.hdf5
34
import numpy as np
5+
import h5py
46
from dpdata.format import Format
57

68

@@ -54,3 +56,49 @@ def from_labeled_system(self, file_name, type_map=None, **kwargs):
5456
return dpdata.deepmd.comp.to_system_data(file_name, type_map=type_map, labels=True)
5557

5658
MultiMode = Format.MultiModes.Directory
59+
60+
@Format.register("deepmd/hdf5")
61+
class DeePMDCompFormat(Format):
62+
"""HDF5 format for DeePMD-kit.
63+
64+
Examples
65+
--------
66+
Dump a MultiSystems to a HDF5 file:
67+
>>> import dpdata
68+
>>> dpdata.MultiSystems().from_deepmd_npy("data").to_deepmd_hdf5("data.hdf5")
69+
"""
70+
def from_system(self, file_name, type_map=None, **kwargs):
71+
s = file_name.split("#")
72+
name = s[1] if len(s) > 1 else ""
73+
with h5py.File(s[0], 'r') as f:
74+
return dpdata.deepmd.hdf5.to_system_data(f, name, type_map=type_map, labels=False)
75+
76+
def from_labeled_system(self, file_name, type_map=None, **kwargs):
77+
s = file_name.split("#")
78+
name = s[1] if len(s) > 1 else ""
79+
with h5py.File(s[0], 'r') as f:
80+
return dpdata.deepmd.hdf5.to_system_data(f, name, type_map=type_map, labels=True)
81+
82+
def to_system(self,
83+
data : dict,
84+
file_name : str,
85+
set_size : int = 5000,
86+
comp_prec : np.dtype = np.float32,
87+
**kwargs):
88+
s = file_name.split("#")
89+
name = s[1] if len(s) > 1 else ""
90+
mode = 'a' if name else 'w'
91+
with h5py.File(s[0], mode) as f:
92+
dpdata.deepmd.hdf5.dump(f, name, data, set_size = set_size, comp_prec = comp_prec)
93+
94+
def from_multi_systems(self,
95+
directory,
96+
**kwargs):
97+
with h5py.File(directory, 'r') as f:
98+
return ["%s#%s" % (directory, ff) for ff in f.keys()]
99+
100+
def to_multi_systems(self,
101+
formulas,
102+
directory,
103+
**kwargs):
104+
return ["%s#%s" % (directory, ff) for ff in formulas]

setup.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -12,7 +12,7 @@
1212
readme = f.read()
1313

1414
# install_requires = ['xml']
15-
install_requires=['numpy>=1.14.3', 'monty', 'scipy']
15+
install_requires=['numpy>=1.14.3', 'monty', 'scipy', 'h5py', 'wcmatch']
1616

1717
setuptools.setup(
1818
name="dpdata",

tests/test_deepmd_hdf5.py

Lines changed: 45 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,45 @@
1+
import os
2+
import numpy as np
3+
import unittest
4+
from context import dpdata
5+
from comp_sys import CompLabeledSys, CompSys, IsPBC
6+
7+
class TestDeepmdLoadDumpComp(unittest.TestCase, CompLabeledSys, IsPBC):
8+
def setUp (self) :
9+
self.system_1 = dpdata.LabeledSystem('poscars/OUTCAR.h2o.md',
10+
fmt = 'vasp/outcar')
11+
self.system_1.to_deepmd_hdf5('tmp.deepmd.hdf5',
12+
prec = np.float64,
13+
set_size = 2)
14+
15+
self.system_2 = dpdata.LabeledSystem('tmp.deepmd.hdf5',
16+
fmt = 'deepmd/hdf5',
17+
type_map = ['O', 'H'])
18+
self.places = 6
19+
self.e_places = 6
20+
self.f_places = 6
21+
self.v_places = 6
22+
23+
def tearDown(self) :
24+
if os.path.exists('tmp.deepmd.hdf5'):
25+
os.remove('tmp.deepmd.hdf5')
26+
27+
28+
class TestDeepmdCompNoLabels(unittest.TestCase, CompSys, IsPBC) :
29+
def setUp (self) :
30+
self.system_1 = dpdata.System('poscars/POSCAR.h2o.md',
31+
fmt = 'vasp/poscar')
32+
self.system_1.to_deepmd_hdf5('tmp.deepmd.hdf5',
33+
prec = np.float64,
34+
set_size = 2)
35+
self.system_2 = dpdata.System('tmp.deepmd.hdf5',
36+
fmt = 'deepmd/hdf5',
37+
type_map = ['O', 'H'])
38+
self.places = 6
39+
self.e_places = 6
40+
self.f_places = 6
41+
self.v_places = 6
42+
43+
def tearDown(self) :
44+
if os.path.exists('tmp.deepmd.hdf5'):
45+
os.remove('tmp.deepmd.hdf5')

0 commit comments

Comments
 (0)