Skip to content

Commit ec284a2

Browse files
authored
Merge pull request #73 from deepmodeling/devel
Merge new features to master
2 parents 5785483 + 410ad7f commit ec284a2

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

54 files changed

+3454
-59
lines changed

.gitignore

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -19,3 +19,4 @@ build
1919
dist
2020
dpdata.egg-info
2121
_version.py
22+
!tests/cp2k/aimd/cp2k.log

README.md

Lines changed: 20 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -99,9 +99,10 @@ xyz_multi_systems.to_deepmd_raw('./my_deepmd_data/')
9999
| deepmd | npy | True | True | LabeledSystem | 'deepmd/npy' |
100100
| gaussian| log | False | True | LabeledSystem | 'gaussian/log'|
101101
| gaussian| log | True | True | LabeledSystem | 'gaussian/md' |
102-
| siesta| output | False | True | LabeledSystem | 'siesta/output'|
103-
| siesta| aimd_output | True | True | LabeledSystem | 'siesta/aimd_output' |
102+
| siesta | output | False | True | LabeledSystem | 'siesta/output'|
103+
| siesta | aimd_output | True | True | LabeledSystem | 'siesta/aimd_output' |
104104
| cp2k | output | False | True | LabeledSystem | 'cp2k/output' |
105+
| cp2k | aimd_output | True | True | LabeledSystem | 'cp2k/aimd_output' |
105106
| QE | log | False | True | LabeledSystem | 'qe/pw/scf' |
106107
| QE | log | True | False | System | 'qe/cp/traj' |
107108
| QE | log | True | True | LabeledSystem | 'qe/cp/traj' |
@@ -152,3 +153,20 @@ Frame selection can be implemented by
152153
dpdata.LabeledSystem('OUTCAR').sub_system([0,-1]).to_deepmd_raw('dpmd_raw')
153154
```
154155
by which only the first and last frames are dumped to `dpmd_raw`.
156+
157+
## replicate
158+
dpdata will create a super cell of the current atom configuration.
159+
```python
160+
dpdata.System('./POSCAR').replicate((1,2,3,) )
161+
```
162+
tuple(1,2,3) means don't copy atom configuration in x direction, make 2 copys in y direction, make 3 copys in z direction.
163+
164+
## perturb
165+
By the following example, each frame of the original system (`dpdata.System('./POSCAR')`) is perturbed to generate three new frames. For each frame, the cell is perturbed by 5% and the atom positions are perturbed by 0.6 Angstrom. `atom_pert_style` indicates that the perturbation to the atom positions is subject to normal distribution. Other available options to `atom_pert_style` are`uniform` (uniform in a ball), and `const` (uniform on a sphere).
166+
```python
167+
perturbed_system = dpdata.System('./POSCAR').perturb(pert_num=3,
168+
cell_pert_fraction=0.05,
169+
atom_pert_distance=0.6,
170+
atom_pert_style='normal')
171+
print(perturbed_system.data)
172+
```

dpdata/cp2k/__init__.py

Whitespace-only changes.

dpdata/cp2k/cell.py

Lines changed: 61 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,61 @@
1+
2+
#%%
3+
import numpy as np
4+
from collections import OrderedDict
5+
import re
6+
7+
def cell_to_low_triangle(A,B,C,alpha,beta,gamma):
8+
"""
9+
Convert cell to low triangle matrix.
10+
11+
Parameters
12+
----------
13+
A : float
14+
cell length A
15+
B : float
16+
cell length B
17+
C : float
18+
cell length C
19+
alpha : float
20+
radian. The angle between vector B and vector C.
21+
beta : float
22+
radian. The angle between vector A and vector C.
23+
gamma : float
24+
radian. The angle between vector B and vector C.
25+
26+
Returns
27+
-------
28+
cell : list
29+
The cell matrix used by dpdata in low triangle form.
30+
"""
31+
if not np.pi*5/180<alpha< np.pi*175/180:
32+
raise RuntimeError("alpha=={}: must be a radian, and \
33+
must be in np.pi*5/180 < alpha < np.pi*175/180".format(alpha))
34+
if not np.pi*5/180<beta< np.pi*175/180:
35+
raise RuntimeError("beta=={}: must be a radian, and \
36+
must be in np.pi*5/180 < beta < np.pi*175/180".format(beta))
37+
if not np.pi*5/180<gamma< np.pi*175/180:
38+
raise RuntimeError("gamma=={}: must be a radian, and \
39+
must be in np.pi*5/180 < gamma < np.pi*175/180".format(gamma))
40+
if not A > 0.2:
41+
raise RuntimeError("A=={}, must be greater than 0.2".format(A))
42+
if not B > 0.2:
43+
raise RuntimeError("B=={}, must be greater than 0.2".format(B))
44+
if not C > 0.2:
45+
raise RuntimeError("C=={}, must be greater than 0.2".format(C))
46+
47+
lx = A
48+
xy = B * np.cos(gamma)
49+
xz = C * np.cos(beta)
50+
ly = B* np.sin(gamma)
51+
if not ly > 0.1:
52+
raise RuntimeError("ly:=B* np.sin(gamma)=={}, must be greater than 0.1",format(ly))
53+
yz = (B*C*np.cos(alpha)-xy*xz)/ly
54+
if not C**2-xz**2-yz**2 > 0.01:
55+
raise RuntimeError("lz^2:=C**2-xz**2-yz**2=={}, must be greater than 0.01",format(C**2-xz**2-yz**2))
56+
lz = np.sqrt(C**2-xz**2-yz**2)
57+
cell = np.asarray([[lx, 0 , 0],
58+
[xy, ly, 0 ],
59+
[xz, yz, lz]]).astype('float32')
60+
return cell
61+

dpdata/cp2k/output.py

Lines changed: 211 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1,11 +1,217 @@
1+
#%%
12
import numpy as np
3+
import re
4+
from collections import OrderedDict
5+
from .cell import cell_to_low_triangle
26

7+
#%%
8+
AU_TO_ANG = 5.29177208590000E-01
9+
AU_TO_EV = 2.72113838565563E+01
10+
AU_TO_EV_EVERY_ANG = AU_TO_EV/AU_TO_ANG
11+
delimiter_patterns=[]
12+
delimiter_p1 = re.compile(r'^ \* GO CP2K GO! \*+')
13+
delimiter_p2 = re.compile(r'^ \*+')
14+
delimiter_patterns.append(delimiter_p1)
15+
delimiter_patterns.append(delimiter_p2)
16+
avail_patterns = []
17+
18+
avail_patterns.append(re.compile(r'^ INITIAL POTENTIAL ENERGY'))
19+
avail_patterns.append(re.compile(r'^ ENSEMBLE TYPE'))
20+
21+
class Cp2kSystems(object):
22+
"""
23+
deal with cp2k outputfile
24+
"""
25+
def __init__(self, log_file_name, xyz_file_name):
26+
self.log_file_object = open(log_file_name, 'r')
27+
self.xyz_file_object = open(xyz_file_name, 'r')
28+
self.log_block_generator = self.get_log_block_generator()
29+
self.xyz_block_generator = self.get_xyz_block_generator()
30+
self.cell=None
31+
def __del__(self):
32+
self.log_file_object.close()
33+
self.xyz_file_object.close()
34+
def __iter__(self):
35+
return self
36+
def __next__(self):
37+
info_dict = {}
38+
log_info_dict = self.handle_single_log_frame(next(self.log_block_generator))
39+
xyz_info_dict = self.handle_single_xyz_frame(next(self.xyz_block_generator))
40+
eq1 = [v1==v2 for v1,v2 in zip(log_info_dict['atom_numbs'], xyz_info_dict['atom_numbs'])]
41+
eq2 = [v1==v2 for v1,v2 in zip(log_info_dict['atom_names'], xyz_info_dict['atom_names'])]
42+
eq3 = [v1==v2 for v1,v2 in zip(log_info_dict['atom_types'], xyz_info_dict['atom_types'])]
43+
assert all(eq1), (log_info_dict,xyz_info_dict,'There may be errors in the file')
44+
assert all(eq2), (log_info_dict,xyz_info_dict,'There may be errors in the file')
45+
assert all(eq3), (log_info_dict,xyz_info_dict,'There may be errors in the file')
46+
assert log_info_dict['energies']==xyz_info_dict['energies'], (log_info_dict['energies'],xyz_info_dict['energies'],'There may be errors in the file')
47+
info_dict.update(log_info_dict)
48+
info_dict.update(xyz_info_dict)
49+
return info_dict
50+
51+
def get_log_block_generator(self):
52+
lines = []
53+
delimiter_flag = False
54+
while True:
55+
line = self.log_file_object.readline()
56+
if line:
57+
lines.append(line)
58+
if any(p.match(line) for p in delimiter_patterns):
59+
if delimiter_flag is True:
60+
yield lines
61+
lines = []
62+
delimiter_flag = False
63+
else:
64+
line = self.log_file_object.readline()
65+
lines.append(line)
66+
if any(p.match(line) for p in avail_patterns):
67+
delimiter_flag = True
68+
else:
69+
break
70+
if delimiter_flag is True:
71+
raise RuntimeError('This file lacks some content, please check')
72+
73+
def get_xyz_block_generator(self):
74+
p3 = re.compile(r'^\s*(\d+)\s*')
75+
while True:
76+
line = self.xyz_file_object.readline()
77+
if not line:
78+
break
79+
if p3.match(line):
80+
atom_num = int(p3.match(line).group(1))
81+
lines = []
82+
lines.append(line)
83+
for ii in range(atom_num+1):
84+
lines.append(self.xyz_file_object.readline())
85+
if not lines[-1]:
86+
raise RuntimeError("this xyz file may lack of lines, should be {};lines:{}".format(atom_num+2, lines))
87+
yield lines
88+
89+
def handle_single_log_frame(self, lines):
90+
info_dict={}
91+
energy_pattern_1 = re.compile(r' INITIAL POTENTIAL ENERGY\[hartree\]\s+=\s+(?P<number>\S+)')
92+
# CONSERVED QUANTITY [hartree] = -0.279168013085E+04
93+
energy_pattern_2 = re.compile(r' POTENTIAL ENERGY\[hartree\]\s+=\s+(?P<number>\S+)')
94+
energy=None
95+
cell_length_pattern = re.compile(r' INITIAL CELL LNTHS\[bohr\]\s+=\s+(?P<A>\S+)\s+(?P<B>\S+)\s+(?P<C>\S+)')
96+
cell_angle_pattern = re.compile(r' INITIAL CELL ANGLS\[deg\]\s+=\s+(?P<alpha>\S+)\s+(?P<beta>\S+)\s+(?P<gamma>\S+)')
97+
cell_A, cell_B, cell_C = (0,0,0,)
98+
cell_alpha, cell_beta, cell_gamma=(0,0,0,)
99+
force_start_pattern = re.compile(r' ATOMIC FORCES in')
100+
force_flag=False
101+
force_end_pattern = re.compile(r' SUM OF ATOMIC FORCES')
102+
force_lines= []
103+
cell_flag=0
104+
for line in lines:
105+
if force_start_pattern.match(line):
106+
force_flag=True
107+
if force_end_pattern.match(line):
108+
assert force_flag is True, (force_flag,'there may be errors in this file ')
109+
force_flag=False
110+
if force_flag is True:
111+
force_lines.append(line)
112+
if energy_pattern_1.match(line):
113+
energy = float(energy_pattern_1.match(line).groupdict()['number']) * AU_TO_EV
114+
if energy_pattern_2.match(line):
115+
energy = float(energy_pattern_2.match(line).groupdict()['number']) * AU_TO_EV
116+
if cell_length_pattern.match(line):
117+
cell_A = float(cell_length_pattern.match(line).groupdict()['A']) * AU_TO_ANG
118+
cell_B = float(cell_length_pattern.match(line).groupdict()['B']) * AU_TO_ANG
119+
cell_C = float(cell_length_pattern.match(line).groupdict()['C']) * AU_TO_ANG
120+
cell_flag+=1
121+
if cell_angle_pattern.match(line):
122+
cell_alpha = np.deg2rad(float(cell_angle_pattern.match(line).groupdict()['alpha']))
123+
cell_beta = np.deg2rad(float(cell_angle_pattern.match(line).groupdict()['beta']))
124+
cell_gamma = np.deg2rad(float(cell_angle_pattern.match(line).groupdict()['gamma']))
125+
cell_flag+=1
126+
if cell_flag == 2:
127+
self.cell = cell_to_low_triangle(cell_A,cell_B,cell_C,
128+
cell_alpha,cell_beta,cell_gamma)
129+
# lx = cell_A
130+
# xy = cell_B * np.cos(cell_gamma)
131+
# xz = cell_C * np.cos(cell_beta)
132+
# ly = cell_B* np.sin(cell_gamma)
133+
# yz = (cell_B*cell_C*np.cos(cell_alpha)-xy*xz)/ly
134+
# lz = np.sqrt(cell_C**2-xz**2-yz**2)
135+
# self.cell = [[lx, 0 , 0],
136+
# [xy, ly, 0 ],
137+
# [xz, yz, lz]]
138+
139+
element_index = -1
140+
element_dict = OrderedDict()
141+
atom_types_list = []
142+
forces_list = []
143+
for line in force_lines[3:]:
144+
line_list = line.split()
145+
if element_dict.get(line_list[2]):
146+
element_dict[line_list[2]][1]+=1
147+
else:
148+
element_index +=1
149+
element_dict[line_list[2]]=[element_index,1]
150+
atom_types_list.append(element_dict[line_list[2]][0])
151+
forces_list.append([float(line_list[3])*AU_TO_EV_EVERY_ANG,
152+
float(line_list[4])*AU_TO_EV_EVERY_ANG,
153+
float(line_list[5])*AU_TO_EV_EVERY_ANG])
154+
155+
atom_names=list(element_dict.keys())
156+
atom_numbs=[]
157+
for ii in atom_names:
158+
atom_numbs.append(element_dict[ii][1])
159+
info_dict['atom_names'] = atom_names
160+
info_dict['atom_numbs'] = atom_numbs
161+
info_dict['atom_types'] = np.asarray(atom_types_list)
162+
info_dict['cells'] = np.asarray([self.cell]).astype('float32')
163+
info_dict['energies'] = np.asarray([energy]).astype('float32')
164+
info_dict['forces'] = np.asarray([forces_list]).astype('float32')
165+
return info_dict
166+
167+
def handle_single_xyz_frame(self, lines):
168+
info_dict = {}
169+
atom_num = int(lines[0].strip('\n').strip())
170+
if len(lines) != atom_num + 2:
171+
raise RuntimeError("format error, atom_num=={}, {}!=atom_num+2".format(atom_num, len(lines)))
172+
data_format_line = lines[1].strip('\n').strip()+str(' ')
173+
prop_pattern = re.compile(r'(?P<prop>\w+)\s*=\s*(?P<number>.*?)[, ]')
174+
prop_dict = dict(prop_pattern.findall(data_format_line))
175+
176+
energy=0
177+
if prop_dict.get('E'):
178+
energy = float(prop_dict.get('E')) * AU_TO_EV
179+
# info_dict['energies'] = np.array([prop_dict['E']]).astype('float32')
180+
181+
element_index = -1
182+
element_dict = OrderedDict()
183+
atom_types_list = []
184+
coords_list = []
185+
for line in lines[2:]:
186+
line_list = line.split()
187+
if element_dict.get(line_list[0]):
188+
element_dict[line_list[0]][1]+=1
189+
else:
190+
element_index +=1
191+
element_dict[line_list[0]]=[element_index,1]
192+
atom_types_list.append(element_dict[line_list[0]][0])
193+
coords_list.append([float(line_list[1])*AU_TO_ANG,
194+
float(line_list[2])*AU_TO_ANG,
195+
float(line_list[3])*AU_TO_ANG])
196+
atom_names=list(element_dict.keys())
197+
atom_numbs=[]
198+
for ii in atom_names:
199+
atom_numbs.append(element_dict[ii][1])
200+
info_dict['atom_names'] = atom_names
201+
info_dict['atom_numbs'] = atom_numbs
202+
info_dict['atom_types'] = np.asarray(atom_types_list)
203+
info_dict['coords'] = np.asarray([coords_list]).astype('float32')
204+
info_dict['energies'] = np.array([energy]).astype('float32')
205+
info_dict['orig']=[0,0,0]
206+
return info_dict
207+
208+
#%%
3209

4210
def get_frames (fname) :
5211
coord_flag = False
6212
force_flag = False
7-
eV = 2.72113838565563E+01
8-
angstrom = 5.29177208590000E-01
213+
eV = 2.72113838565563E+01 # hatree to eV
214+
angstrom = 5.29177208590000E-01 # Bohrto Angstrom
9215
fp = open(fname)
10216
atom_symbol_list = []
11217
cell = []
@@ -74,3 +280,6 @@ def get_frames (fname) :
74280
return list(atom_names), atom_numbs, atom_types, cell, coord, energy, force
75281

76282

283+
284+
285+
# %%

dpdata/deepmd/__init__.py

Whitespace-only changes.

dpdata/deepmd/comp.py

Lines changed: 8 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -48,6 +48,8 @@ def to_system_data(folder,
4848
data['forces'] = np.concatenate(all_forces, axis = 0)
4949
if len(all_virs) > 0:
5050
data['virials'] = np.concatenate(all_virs, axis = 0)
51+
if os.path.isfile(os.path.join(folder, "nopbc")):
52+
data['nopbc'] = True
5153
return data
5254

5355

@@ -101,5 +103,10 @@ def dump(folder,
101103
np.save(os.path.join(set_folder, 'virial'), virials[set_stt:set_end])
102104
if 'atom_pref' in data:
103105
np.save(os.path.join(set_folder, "atom_pref"), atom_pref[set_stt:set_end])
104-
106+
try:
107+
os.remove(os.path.join(folder, "nopbc"))
108+
except OSError:
109+
pass
110+
if data.get("nopbc", False):
111+
os.mknod(os.path.join(folder, "nopbc"))
105112

dpdata/deepmd/raw.py

Lines changed: 8 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -49,6 +49,8 @@ def to_system_data(folder, type_map = None, labels = True) :
4949
if os.path.exists(os.path.join(folder, 'virial.raw')) :
5050
data['virials'] = np.loadtxt(os.path.join(folder, 'virial.raw'))
5151
data['virials'] = np.reshape(data['virials'], [nframes, 3, 3])
52+
if os.path.isfile(os.path.join(folder, "nopbc")):
53+
data['nopbc'] = True
5254
return data
5355
else :
5456
raise RuntimeError('not dir ' + folder)
@@ -67,5 +69,10 @@ def dump (folder, data) :
6769
np.savetxt(os.path.join(folder, 'force.raw'), np.reshape(data['forces'], [nframes, -1]))
6870
if 'virials' in data :
6971
np.savetxt(os.path.join(folder, 'virial.raw'), np.reshape(data['virials'], [nframes, 9]))
70-
72+
try:
73+
os.remove(os.path.join(folder, "nopbc"))
74+
except OSError:
75+
pass
76+
if data.get("nopbc", False):
77+
os.mknod(os.path.join(folder, "nopbc"))
7178

dpdata/gaussian/__init__.py

Whitespace-only changes.

dpdata/gaussian/log.py

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -69,4 +69,5 @@ def to_system_data(file_name, md=False):
6969
data['coords'] = np.array(coords_t)
7070
data['orig'] = np.array([0, 0, 0])
7171
data['cells'] = np.array([[[100., 0., 0.], [0., 100., 0.], [0., 0., 100.]] for _ in energy_t])
72+
data['nopbc'] = True
7273
return data

0 commit comments

Comments
 (0)