Skip to content

Commit 6b0c957

Browse files
authored
Merge pull request #71 from felix5572/devel
support cp2k/aimd_output
2 parents 8dac1f5 + 2bec372 commit 6b0c957

20 files changed

+2729
-4
lines changed

.gitignore

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -19,3 +19,4 @@ build
1919
dist
2020
dpdata.egg-info
2121
_version.py
22+
!tests/cp2k/aimd/cp2k.log

README.md

Lines changed: 3 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -99,9 +99,10 @@ xyz_multi_systems.to_deepmd_raw('./my_deepmd_data/')
9999
| deepmd | npy | True | True | LabeledSystem | 'deepmd/npy' |
100100
| gaussian| log | False | True | LabeledSystem | 'gaussian/log'|
101101
| gaussian| log | True | True | LabeledSystem | 'gaussian/md' |
102-
| siesta| output | False | True | LabeledSystem | 'siesta/output'|
103-
| siesta| aimd_output | True | True | LabeledSystem | 'siesta/aimd_output' |
102+
| siesta | output | False | True | LabeledSystem | 'siesta/output'|
103+
| siesta | aimd_output | True | True | LabeledSystem | 'siesta/aimd_output' |
104104
| cp2k | output | False | True | LabeledSystem | 'cp2k/output' |
105+
| cp2k | aimd_output | True | True | LabeledSystem | 'cp2k/aimd_output' |
105106
| QE | log | False | True | LabeledSystem | 'qe/pw/scf' |
106107
| QE | log | True | False | System | 'qe/cp/traj' |
107108
| QE | log | True | True | LabeledSystem | 'qe/cp/traj' |

dpdata/cp2k/__init__.py

Whitespace-only changes.

dpdata/cp2k/cell.py

Lines changed: 61 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,61 @@
1+
2+
#%%
3+
import numpy as np
4+
from collections import OrderedDict
5+
import re
6+
7+
def cell_to_low_triangle(A,B,C,alpha,beta,gamma):
8+
"""
9+
Convert cell to low triangle matrix.
10+
11+
Parameters
12+
----------
13+
A : float
14+
cell length A
15+
B : float
16+
cell length B
17+
C : float
18+
cell length C
19+
alpha : float
20+
radian. The angle between vector B and vector C.
21+
beta : float
22+
radian. The angle between vector A and vector C.
23+
gamma : float
24+
radian. The angle between vector B and vector C.
25+
26+
Returns
27+
-------
28+
cell : list
29+
The cell matrix used by dpdata in low triangle form.
30+
"""
31+
if not np.pi*5/180<alpha< np.pi*175/180:
32+
raise RuntimeError("alpha=={}: must be a radian, and \
33+
must be in np.pi*5/180 < alpha < np.pi*175/180".format(alpha))
34+
if not np.pi*5/180<beta< np.pi*175/180:
35+
raise RuntimeError("beta=={}: must be a radian, and \
36+
must be in np.pi*5/180 < beta < np.pi*175/180".format(beta))
37+
if not np.pi*5/180<gamma< np.pi*175/180:
38+
raise RuntimeError("gamma=={}: must be a radian, and \
39+
must be in np.pi*5/180 < gamma < np.pi*175/180".format(gamma))
40+
if not A > 0.2:
41+
raise RuntimeError("A=={}, must be greater than 0.2".format(A))
42+
if not B > 0.2:
43+
raise RuntimeError("B=={}, must be greater than 0.2".format(B))
44+
if not C > 0.2:
45+
raise RuntimeError("C=={}, must be greater than 0.2".format(C))
46+
47+
lx = A
48+
xy = B * np.cos(gamma)
49+
xz = C * np.cos(beta)
50+
ly = B* np.sin(gamma)
51+
if not ly > 0.1:
52+
raise RuntimeError("ly:=B* np.sin(gamma)=={}, must be greater than 0.1",format(ly))
53+
yz = (B*C*np.cos(alpha)-xy*xz)/ly
54+
if not C**2-xz**2-yz**2 > 0.01:
55+
raise RuntimeError("lz^2:=C**2-xz**2-yz**2=={}, must be greater than 0.01",format(C**2-xz**2-yz**2))
56+
lz = np.sqrt(C**2-xz**2-yz**2)
57+
cell = np.asarray([[lx, 0 , 0],
58+
[xy, ly, 0 ],
59+
[xz, yz, lz]]).astype('float32')
60+
return cell
61+

dpdata/cp2k/output.py

Lines changed: 211 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1,11 +1,217 @@
1+
#%%
12
import numpy as np
3+
import re
4+
from collections import OrderedDict
5+
from .cell import cell_to_low_triangle
26

7+
#%%
8+
AU_TO_ANG = 5.29177208590000E-01
9+
AU_TO_EV = 2.72113838565563E+01
10+
AU_TO_EV_EVERY_ANG = AU_TO_EV/AU_TO_ANG
11+
delimiter_patterns=[]
12+
delimiter_p1 = re.compile(r'^ \* GO CP2K GO! \*+')
13+
delimiter_p2 = re.compile(r'^ \*+')
14+
delimiter_patterns.append(delimiter_p1)
15+
delimiter_patterns.append(delimiter_p2)
16+
avail_patterns = []
17+
18+
avail_patterns.append(re.compile(r'^ INITIAL POTENTIAL ENERGY'))
19+
avail_patterns.append(re.compile(r'^ ENSEMBLE TYPE'))
20+
21+
class Cp2kSystems(object):
22+
"""
23+
deal with cp2k outputfile
24+
"""
25+
def __init__(self, log_file_name, xyz_file_name):
26+
self.log_file_object = open(log_file_name, 'r')
27+
self.xyz_file_object = open(xyz_file_name, 'r')
28+
self.log_block_generator = self.get_log_block_generator()
29+
self.xyz_block_generator = self.get_xyz_block_generator()
30+
self.cell=None
31+
def __del__(self):
32+
self.log_file_object.close()
33+
self.xyz_file_object.close()
34+
def __iter__(self):
35+
return self
36+
def __next__(self):
37+
info_dict = {}
38+
log_info_dict = self.handle_single_log_frame(next(self.log_block_generator))
39+
xyz_info_dict = self.handle_single_xyz_frame(next(self.xyz_block_generator))
40+
eq1 = [v1==v2 for v1,v2 in zip(log_info_dict['atom_numbs'], xyz_info_dict['atom_numbs'])]
41+
eq2 = [v1==v2 for v1,v2 in zip(log_info_dict['atom_names'], xyz_info_dict['atom_names'])]
42+
eq3 = [v1==v2 for v1,v2 in zip(log_info_dict['atom_types'], xyz_info_dict['atom_types'])]
43+
assert all(eq1), (log_info_dict,xyz_info_dict,'There may be errors in the file')
44+
assert all(eq2), (log_info_dict,xyz_info_dict,'There may be errors in the file')
45+
assert all(eq3), (log_info_dict,xyz_info_dict,'There may be errors in the file')
46+
assert log_info_dict['energies']==xyz_info_dict['energies'], (log_info_dict['energies'],xyz_info_dict['energies'],'There may be errors in the file')
47+
info_dict.update(log_info_dict)
48+
info_dict.update(xyz_info_dict)
49+
return info_dict
50+
51+
def get_log_block_generator(self):
52+
lines = []
53+
delimiter_flag = False
54+
while True:
55+
line = self.log_file_object.readline()
56+
if line:
57+
lines.append(line)
58+
if any(p.match(line) for p in delimiter_patterns):
59+
if delimiter_flag is True:
60+
yield lines
61+
lines = []
62+
delimiter_flag = False
63+
else:
64+
line = self.log_file_object.readline()
65+
lines.append(line)
66+
if any(p.match(line) for p in avail_patterns):
67+
delimiter_flag = True
68+
else:
69+
break
70+
if delimiter_flag is True:
71+
raise RuntimeError('This file lacks some content, please check')
72+
73+
def get_xyz_block_generator(self):
74+
p3 = re.compile(r'^\s*(\d+)\s*')
75+
while True:
76+
line = self.xyz_file_object.readline()
77+
if not line:
78+
break
79+
if p3.match(line):
80+
atom_num = int(p3.match(line).group(1))
81+
lines = []
82+
lines.append(line)
83+
for ii in range(atom_num+1):
84+
lines.append(self.xyz_file_object.readline())
85+
if not lines[-1]:
86+
raise RuntimeError("this xyz file may lack of lines, should be {};lines:{}".format(atom_num+2, lines))
87+
yield lines
88+
89+
def handle_single_log_frame(self, lines):
90+
info_dict={}
91+
energy_pattern_1 = re.compile(r' INITIAL POTENTIAL ENERGY\[hartree\]\s+=\s+(?P<number>\S+)')
92+
# CONSERVED QUANTITY [hartree] = -0.279168013085E+04
93+
energy_pattern_2 = re.compile(r' POTENTIAL ENERGY\[hartree\]\s+=\s+(?P<number>\S+)')
94+
energy=None
95+
cell_length_pattern = re.compile(r' INITIAL CELL LNTHS\[bohr\]\s+=\s+(?P<A>\S+)\s+(?P<B>\S+)\s+(?P<C>\S+)')
96+
cell_angle_pattern = re.compile(r' INITIAL CELL ANGLS\[deg\]\s+=\s+(?P<alpha>\S+)\s+(?P<beta>\S+)\s+(?P<gamma>\S+)')
97+
cell_A, cell_B, cell_C = (0,0,0,)
98+
cell_alpha, cell_beta, cell_gamma=(0,0,0,)
99+
force_start_pattern = re.compile(r' ATOMIC FORCES in')
100+
force_flag=False
101+
force_end_pattern = re.compile(r' SUM OF ATOMIC FORCES')
102+
force_lines= []
103+
cell_flag=0
104+
for line in lines:
105+
if force_start_pattern.match(line):
106+
force_flag=True
107+
if force_end_pattern.match(line):
108+
assert force_flag is True, (force_flag,'there may be errors in this file ')
109+
force_flag=False
110+
if force_flag is True:
111+
force_lines.append(line)
112+
if energy_pattern_1.match(line):
113+
energy = float(energy_pattern_1.match(line).groupdict()['number']) * AU_TO_EV
114+
if energy_pattern_2.match(line):
115+
energy = float(energy_pattern_2.match(line).groupdict()['number']) * AU_TO_EV
116+
if cell_length_pattern.match(line):
117+
cell_A = float(cell_length_pattern.match(line).groupdict()['A']) * AU_TO_ANG
118+
cell_B = float(cell_length_pattern.match(line).groupdict()['B']) * AU_TO_ANG
119+
cell_C = float(cell_length_pattern.match(line).groupdict()['C']) * AU_TO_ANG
120+
cell_flag+=1
121+
if cell_angle_pattern.match(line):
122+
cell_alpha = np.deg2rad(float(cell_angle_pattern.match(line).groupdict()['alpha']))
123+
cell_beta = np.deg2rad(float(cell_angle_pattern.match(line).groupdict()['beta']))
124+
cell_gamma = np.deg2rad(float(cell_angle_pattern.match(line).groupdict()['gamma']))
125+
cell_flag+=1
126+
if cell_flag == 2:
127+
self.cell = cell_to_low_triangle(cell_A,cell_B,cell_C,
128+
cell_alpha,cell_beta,cell_gamma)
129+
# lx = cell_A
130+
# xy = cell_B * np.cos(cell_gamma)
131+
# xz = cell_C * np.cos(cell_beta)
132+
# ly = cell_B* np.sin(cell_gamma)
133+
# yz = (cell_B*cell_C*np.cos(cell_alpha)-xy*xz)/ly
134+
# lz = np.sqrt(cell_C**2-xz**2-yz**2)
135+
# self.cell = [[lx, 0 , 0],
136+
# [xy, ly, 0 ],
137+
# [xz, yz, lz]]
138+
139+
element_index = -1
140+
element_dict = OrderedDict()
141+
atom_types_list = []
142+
forces_list = []
143+
for line in force_lines[3:]:
144+
line_list = line.split()
145+
if element_dict.get(line_list[2]):
146+
element_dict[line_list[2]][1]+=1
147+
else:
148+
element_index +=1
149+
element_dict[line_list[2]]=[element_index,1]
150+
atom_types_list.append(element_dict[line_list[2]][0])
151+
forces_list.append([float(line_list[3])*AU_TO_EV_EVERY_ANG,
152+
float(line_list[4])*AU_TO_EV_EVERY_ANG,
153+
float(line_list[5])*AU_TO_EV_EVERY_ANG])
154+
155+
atom_names=list(element_dict.keys())
156+
atom_numbs=[]
157+
for ii in atom_names:
158+
atom_numbs.append(element_dict[ii][1])
159+
info_dict['atom_names'] = atom_names
160+
info_dict['atom_numbs'] = atom_numbs
161+
info_dict['atom_types'] = np.asarray(atom_types_list)
162+
info_dict['cells'] = np.asarray([self.cell]).astype('float32')
163+
info_dict['energies'] = np.asarray([energy]).astype('float32')
164+
info_dict['forces'] = np.asarray([forces_list]).astype('float32')
165+
return info_dict
166+
167+
def handle_single_xyz_frame(self, lines):
168+
info_dict = {}
169+
atom_num = int(lines[0].strip('\n').strip())
170+
if len(lines) != atom_num + 2:
171+
raise RuntimeError("format error, atom_num=={}, {}!=atom_num+2".format(atom_num, len(lines)))
172+
data_format_line = lines[1].strip('\n').strip()+str(' ')
173+
prop_pattern = re.compile(r'(?P<prop>\w+)\s*=\s*(?P<number>.*?)[, ]')
174+
prop_dict = dict(prop_pattern.findall(data_format_line))
175+
176+
energy=0
177+
if prop_dict.get('E'):
178+
energy = float(prop_dict.get('E')) * AU_TO_EV
179+
# info_dict['energies'] = np.array([prop_dict['E']]).astype('float32')
180+
181+
element_index = -1
182+
element_dict = OrderedDict()
183+
atom_types_list = []
184+
coords_list = []
185+
for line in lines[2:]:
186+
line_list = line.split()
187+
if element_dict.get(line_list[0]):
188+
element_dict[line_list[0]][1]+=1
189+
else:
190+
element_index +=1
191+
element_dict[line_list[0]]=[element_index,1]
192+
atom_types_list.append(element_dict[line_list[0]][0])
193+
coords_list.append([float(line_list[1])*AU_TO_ANG,
194+
float(line_list[2])*AU_TO_ANG,
195+
float(line_list[3])*AU_TO_ANG])
196+
atom_names=list(element_dict.keys())
197+
atom_numbs=[]
198+
for ii in atom_names:
199+
atom_numbs.append(element_dict[ii][1])
200+
info_dict['atom_names'] = atom_names
201+
info_dict['atom_numbs'] = atom_numbs
202+
info_dict['atom_types'] = np.asarray(atom_types_list)
203+
info_dict['coords'] = np.asarray([coords_list]).astype('float32')
204+
info_dict['energies'] = np.array([energy]).astype('float32')
205+
info_dict['orig']=[0,0,0]
206+
return info_dict
207+
208+
#%%
3209

4210
def get_frames (fname) :
5211
coord_flag = False
6212
force_flag = False
7-
eV = 2.72113838565563E+01
8-
angstrom = 5.29177208590000E-01
213+
eV = 2.72113838565563E+01 # hatree to eV
214+
angstrom = 5.29177208590000E-01 # Bohrto Angstrom
9215
fp = open(fname)
10216
atom_symbol_list = []
11217
cell = []
@@ -74,3 +280,6 @@ def get_frames (fname) :
74280
return list(atom_names), atom_numbs, atom_types, cell, coord, energy, force
75281

76282

283+
284+
285+
# %%

dpdata/system.py

Lines changed: 12 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -16,6 +16,7 @@
1616
import dpdata.md.pbc
1717
import dpdata.gaussian.log
1818
import dpdata.cp2k.output
19+
from dpdata.cp2k.output import Cp2kSystems
1920
from copy import deepcopy
2021
from monty.json import MSONable
2122
from monty.serialization import loadfn,dumpfn
@@ -816,6 +817,7 @@ def __init__ (self,
816817
- ``gaussian/log``: gaussian logs
817818
- ``gaussian/md``: gaussian ab initio molecular dynamics
818819
- ``cp2k/output``: cp2k output file
820+
- ``cp2k/aimd_output``: cp2k aimd output dir(contains *pos*.xyz and *.log file)
819821
820822
type_map : list of str
821823
Needed by formats deepmd/raw and deepmd/npy. Maps atom type to name. The atom with type `ii` is mapped to `type_map[ii]`.
@@ -858,6 +860,8 @@ def __init__ (self,
858860
self.from_gaussian_log(file_name, md=True)
859861
elif fmt == 'cp2k/output':
860862
self.from_cp2k_output(file_name)
863+
elif fmt == 'cp2k/aimd_output':
864+
self.from_cp2k_aimd_output(file_dir=file_name)
861865
else :
862866
raise RuntimeError('unknow data format ' + fmt)
863867

@@ -901,6 +905,14 @@ def has_virial(self) :
901905
# return ('virials' in self.data) and (len(self.data['virials']) > 0)
902906
return ('virials' in self.data)
903907

908+
909+
def from_cp2k_aimd_output(self, file_dir):
910+
xyz_file=glob.glob("{}/*pos*.xyz".format(file_dir))[0]
911+
log_file=glob.glob("{}/*.log".format(file_dir))[0]
912+
for info_dict in Cp2kSystems(log_file, xyz_file):
913+
l = LabeledSystem(data=info_dict)
914+
self.append(l)
915+
904916
def from_vasp_xml(self, file_name, begin = 0, step = 1) :
905917
self.data['atom_names'], \
906918
self.data['atom_types'], \

0 commit comments

Comments
 (0)