Skip to content

Commit 59047e4

Browse files
authored
enhance the cp2k aimd output parser by reading atomic kind (#235)
* enhance the cp2k aimd output parser by reading atomic kind * add log file
1 parent b23ec52 commit 59047e4

30 files changed

+99088
-3146
lines changed

dpdata/cp2k/output.py

Lines changed: 73 additions & 18 deletions
Original file line numberDiff line numberDiff line change
@@ -2,6 +2,8 @@
22
import numpy as np
33
import re
44
from collections import OrderedDict
5+
6+
from scipy.constants.constants import R
57
from .cell import cell_to_low_triangle
68
from ..unit import EnergyConversion, LengthConversion, ForceConversion, PressureConversion
79

@@ -29,6 +31,8 @@ def __init__(self, log_file_name, xyz_file_name, restart=False):
2931
self.xyz_block_generator = self.get_xyz_block_generator()
3032
self.restart_flag = restart
3133
self.cell=None
34+
self.print_level=None
35+
self.atomic_kinds = None
3236

3337
if self.restart_flag:
3438
self.handle_single_log_frame(next(self.log_block_generator))
@@ -43,13 +47,14 @@ def __iter__(self):
4347
def __next__(self):
4448
info_dict = {}
4549
log_info_dict = self.handle_single_log_frame(next(self.log_block_generator))
50+
#print(log_info_dict)
4651
xyz_info_dict = self.handle_single_xyz_frame(next(self.xyz_block_generator))
47-
eq1 = [v1==v2 for v1,v2 in zip(log_info_dict['atom_numbs'], xyz_info_dict['atom_numbs'])]
48-
eq2 = [v1==v2 for v1,v2 in zip(log_info_dict['atom_names'], xyz_info_dict['atom_names'])]
49-
eq3 = [v1==v2 for v1,v2 in zip(log_info_dict['atom_types'], xyz_info_dict['atom_types'])]
50-
assert all(eq1), (log_info_dict,xyz_info_dict,'There may be errors in the file. If it is a restart task; use restart=True')
51-
assert all(eq2), (log_info_dict,xyz_info_dict,'There may be errors in the file. If it is a restart task; use restart=True')
52-
assert all(eq3), (log_info_dict,xyz_info_dict,'There may be errors in the file. If it is a restart task; use restart=True')
52+
#eq1 = [v1==v2 for v1,v2 in zip(log_info_dict['atom_numbs'], xyz_info_dict['atom_numbs'])]
53+
#eq2 = [v1==v2 for v1,v2 in zip(log_info_dict['atom_names'], xyz_info_dict['atom_names'])]
54+
#eq3 = [v1==v2 for v1,v2 in zip(log_info_dict['atom_types'], xyz_info_dict['atom_types'])]
55+
#assert all(eq1), (log_info_dict,xyz_info_dict,'There may be errors in the file. If it is a restart task; use restart=True')
56+
#assert all(eq2), (log_info_dict,xyz_info_dict,'There may be errors in the file. If it is a restart task; use restart=True')
57+
#assert all(eq3), (log_info_dict,xyz_info_dict,'There may be errors in the file. If it is a restart task; use restart=True')
5358
assert log_info_dict['energies']==xyz_info_dict['energies'], (log_info_dict['energies'], xyz_info_dict['energies'],'There may be errors in the file')
5459
info_dict.update(log_info_dict)
5560
info_dict.update(xyz_info_dict)
@@ -103,11 +108,18 @@ def handle_single_log_frame(self, lines):
103108
cell_angle_pattern = re.compile(r' INITIAL CELL ANGLS\[deg\]\s+=\s+(?P<alpha>\S+)\s+(?P<beta>\S+)\s+(?P<gamma>\S+)')
104109
cell_A, cell_B, cell_C = (0,0,0,)
105110
cell_alpha, cell_beta, cell_gamma=(0,0,0,)
111+
cell_a_pattern = re.compile(r' CELL\| Vector a \[angstrom\]:\s+(?P<ax>\S+)\s+(?P<ay>\S+)\s+(?P<az>\S+)')
112+
cell_b_pattern = re.compile(r' CELL\| Vector b \[angstrom\]:\s+(?P<bx>\S+)\s+(?P<by>\S+)\s+(?P<bz>\S+)')
113+
cell_c_pattern = re.compile(r' CELL\| Vector c \[angstrom\]:\s+(?P<cx>\S+)\s+(?P<cy>\S+)\s+(?P<cz>\S+)')
106114
force_start_pattern = re.compile(r' ATOMIC FORCES in')
107115
force_flag=False
108116
force_end_pattern = re.compile(r' SUM OF ATOMIC FORCES')
109117
force_lines= []
110118
cell_flag=0
119+
print_level_pattern = re.compile(r' GLOBAL\| Global print level\s+(?P<print_level>\S+)')
120+
print_level_flag = 0
121+
atomic_kinds_pattern = re.compile(r'\s+\d+\. Atomic kind:\s+(?P<akind>\S+)')
122+
atomic_kinds = []
111123
for line in lines:
112124
if force_start_pattern.match(line):
113125
force_flag=True
@@ -131,9 +143,47 @@ def handle_single_log_frame(self, lines):
131143
cell_beta = np.deg2rad(float(cell_angle_pattern.match(line).groupdict()['beta']))
132144
cell_gamma = np.deg2rad(float(cell_angle_pattern.match(line).groupdict()['gamma']))
133145
cell_flag+=1
146+
if print_level_pattern.match(line):
147+
print_level = print_level_pattern.match(line).groupdict()['print_level']
148+
print_level_flag += 1
149+
if cell_a_pattern.match(line):
150+
cell_ax = float(cell_a_pattern.match(line).groupdict()['ax'])
151+
cell_ay = float(cell_a_pattern.match(line).groupdict()['ay'])
152+
cell_az = float(cell_a_pattern.match(line).groupdict()['az'])
153+
cell_flag+=1
154+
if cell_b_pattern.match(line):
155+
cell_bx = float(cell_b_pattern.match(line).groupdict()['bx'])
156+
cell_by = float(cell_b_pattern.match(line).groupdict()['by'])
157+
cell_bz = float(cell_b_pattern.match(line).groupdict()['bz'])
158+
cell_flag+=1
159+
if cell_c_pattern.match(line):
160+
cell_cx = float(cell_c_pattern.match(line).groupdict()['cx'])
161+
cell_cy = float(cell_c_pattern.match(line).groupdict()['cy'])
162+
cell_cz = float(cell_c_pattern.match(line).groupdict()['cz'])
163+
cell_flag+=1
164+
165+
if atomic_kinds_pattern.match(line):
166+
akind = atomic_kinds_pattern.match(line).groupdict()['akind']
167+
atomic_kinds.append(akind)
168+
if print_level_flag == 1:
169+
self.print_level = print_level
170+
if print_level == 'LOW':
171+
raise RuntimeError("please provide cp2k output with higher print level(at least MEDIUM)")
172+
173+
134174
if cell_flag == 2:
135175
self.cell = cell_to_low_triangle(cell_A,cell_B,cell_C,
136176
cell_alpha,cell_beta,cell_gamma)
177+
elif cell_flag == 5:
178+
self.cell = np.asarray(
179+
[
180+
[cell_ax, cell_ay, cell_az],
181+
[cell_bx, cell_by, cell_bz],
182+
[cell_cx, cell_cy, cell_cz]]
183+
).astype('float32')
184+
if atomic_kinds:
185+
self.atomic_kinds = atomic_kinds
186+
#print(self.atomic_kinds)
137187
# lx = cell_A
138188
# xy = cell_B * np.cos(cell_gamma)
139189
# xz = cell_C * np.cos(cell_beta)
@@ -146,27 +196,32 @@ def handle_single_log_frame(self, lines):
146196

147197
element_index = -1
148198
element_dict = OrderedDict()
149-
atom_types_list = []
199+
atom_types_idx_list = []
150200
forces_list = []
151201
for line in force_lines[3:]:
152202
line_list = line.split()
153-
if element_dict.get(line_list[2]):
154-
element_dict[line_list[2]][1]+=1
203+
#print(line_list)
204+
if element_dict.get(line_list[1]):
205+
element_dict[line_list[1]][1]+=1
155206
else:
156207
element_index +=1
157-
element_dict[line_list[2]]=[element_index,1]
158-
atom_types_list.append(element_dict[line_list[2]][0])
208+
element_dict[line_list[1]]=[element_index,1]
209+
atom_types_idx_list.append(element_dict[line_list[1]][0])
159210
forces_list.append([float(line_list[3])*AU_TO_EV_EVERY_ANG,
160211
float(line_list[4])*AU_TO_EV_EVERY_ANG,
161212
float(line_list[5])*AU_TO_EV_EVERY_ANG])
162-
163-
atom_names=list(element_dict.keys())
213+
#print(atom_types_idx_list)
214+
#atom_names=list(element_dict.keys())
215+
atom_names=self.atomic_kinds
164216
atom_numbs=[]
165-
for ii in atom_names:
217+
218+
for ii in element_dict.keys():
166219
atom_numbs.append(element_dict[ii][1])
220+
#print(atom_numbs)
167221
info_dict['atom_names'] = atom_names
168222
info_dict['atom_numbs'] = atom_numbs
169-
info_dict['atom_types'] = np.asarray(atom_types_list)
223+
info_dict['atom_types'] = np.asarray(atom_types_idx_list)
224+
info_dict['print_level'] = self.print_level
170225
info_dict['cells'] = np.asarray([self.cell]).astype('float32')
171226
info_dict['energies'] = np.asarray([energy]).astype('float32')
172227
info_dict['forces'] = np.asarray([forces_list]).astype('float32')
@@ -208,9 +263,9 @@ def handle_single_xyz_frame(self, lines):
208263
atom_numbs=[]
209264
for ii in atom_names:
210265
atom_numbs.append(element_dict[ii][1])
211-
info_dict['atom_names'] = atom_names
212-
info_dict['atom_numbs'] = atom_numbs
213-
info_dict['atom_types'] = np.asarray(atom_types_list)
266+
#info_dict['atom_names'] = atom_names
267+
#info_dict['atom_numbs'] = atom_numbs
268+
#info_dict['atom_types'] = np.asarray(atom_types_list)
214269
info_dict['coords'] = np.asarray([coords_list]).astype('float32')
215270
info_dict['energies'] = np.array([energy]).astype('float32')
216271
info_dict['orig']=[0,0,0]

tests/cp2k/aimd/DPGEN-1.ener

Lines changed: 31 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,31 @@
1+
# Step Nr. Time[fs] Kin.[a.u.] Temp[K] Pot.[a.u.] Cons Qty[a.u.] UsedTime[s]
2+
1 0.000000 0.000000000 0.000000000 -12660.628917195 0.000000000 936.038164850
3+
2 0.000000 0.000000000 0.000000000 -12660.653046555 0.000000000 274.698523818
4+
3 0.000000 0.000000000 0.000000000 -12660.715916301 0.000000000 344.194269081
5+
4 0.000000 0.000000000 0.000000000 -12660.683902467 0.000000000 365.777609413
6+
5 0.000000 0.000000000 0.000000000 -12660.717204918 0.000000000 355.401395731
7+
6 0.000000 0.000000000 0.000000000 -12660.733526044 0.000000000 441.982779772
8+
7 0.000000 0.000000000 0.000000000 -12660.692762118 0.000000000 399.540830899
9+
8 0.000000 0.000000000 0.000000000 -12660.693929365 0.000000000 441.216501789
10+
9 0.000000 0.000000000 0.000000000 -12660.651822948 0.000000000 397.996012313
11+
10 0.000000 0.000000000 0.000000000 -12660.695752782 0.000000000 387.826853781
12+
11 0.000000 0.000000000 0.000000000 -12660.648232319 0.000000000 420.456175307
13+
12 0.000000 0.000000000 0.000000000 -12660.607125395 0.000000000 546.109416880
14+
13 0.000000 0.000000000 0.000000000 -12660.628884156 0.000000000 419.908203241
15+
14 0.000000 0.000000000 0.000000000 -12660.608694793 0.000000000 511.207648837
16+
15 0.000000 0.000000000 0.000000000 -12660.632201776 0.000000000 419.362851471
17+
16 0.000000 0.000000000 0.000000000 -12660.622454799 0.000000000 430.431832543
18+
17 0.000000 0.000000000 0.000000000 -12660.618601776 0.000000000 441.716595725
19+
18 0.000000 0.000000000 0.000000000 -12660.670388010 0.000000000 365.645702597
20+
19 0.000000 0.000000000 0.000000000 -12660.647822007 0.000000000 408.940343004
21+
20 0.000000 0.000000000 0.000000000 -12660.656801404 0.000000000 451.677564803
22+
21 0.000000 0.000000000 0.000000000 -12660.658641834 0.000000000 623.018366899
23+
22 0.000000 0.000000000 0.000000000 -12660.693126694 0.000000000 462.518734109
24+
23 0.000000 0.000000000 0.000000000 -12660.678342029 0.000000000 355.707985190
25+
24 0.000000 0.000000000 0.000000000 -12660.652483744 0.000000000 398.295128799
26+
25 0.000000 0.000000000 0.000000000 -12660.631736112 0.000000000 442.329493437
27+
26 0.000000 0.000000000 0.000000000 -12660.641113462 0.000000000 397.965252467
28+
27 0.000000 0.000000000 0.000000000 -12660.713753654 0.000000000 441.802233378
29+
28 0.000000 0.000000000 0.000000000 -12660.724079194 0.000000000 397.543293045
30+
29 0.000000 0.000000000 0.000000000 -12660.705785856 0.000000000 355.694321892
31+
30 0.000000 0.000000000 0.000000000 -12660.703546464 0.000000000 420.815910676

0 commit comments

Comments
 (0)