|
| 1 | +#%% |
1 | 2 | import numpy as np
|
| 3 | +import re |
| 4 | +from collections import OrderedDict |
| 5 | +from .cell import cell_to_low_triangle |
2 | 6 |
|
| 7 | +#%% |
| 8 | +AU_TO_ANG = 5.29177208590000E-01 |
| 9 | +AU_TO_EV = 2.72113838565563E+01 |
| 10 | +AU_TO_EV_EVERY_ANG = AU_TO_EV/AU_TO_ANG |
| 11 | +delimiter_patterns=[] |
| 12 | +delimiter_p1 = re.compile(r'^ \* GO CP2K GO! \*+') |
| 13 | +delimiter_p2 = re.compile(r'^ \*+') |
| 14 | +delimiter_patterns.append(delimiter_p1) |
| 15 | +delimiter_patterns.append(delimiter_p2) |
| 16 | +avail_patterns = [] |
| 17 | + |
| 18 | +avail_patterns.append(re.compile(r'^ INITIAL POTENTIAL ENERGY')) |
| 19 | +avail_patterns.append(re.compile(r'^ ENSEMBLE TYPE')) |
| 20 | + |
| 21 | +class Cp2kSystems(object): |
| 22 | + """ |
| 23 | + deal with cp2k outputfile |
| 24 | + """ |
| 25 | + def __init__(self, log_file_name, xyz_file_name): |
| 26 | + self.log_file_object = open(log_file_name, 'r') |
| 27 | + self.xyz_file_object = open(xyz_file_name, 'r') |
| 28 | + self.log_block_generator = self.get_log_block_generator() |
| 29 | + self.xyz_block_generator = self.get_xyz_block_generator() |
| 30 | + self.cell=None |
| 31 | + def __del__(self): |
| 32 | + self.log_file_object.close() |
| 33 | + self.xyz_file_object.close() |
| 34 | + def __iter__(self): |
| 35 | + return self |
| 36 | + def __next__(self): |
| 37 | + info_dict = {} |
| 38 | + log_info_dict = self.handle_single_log_frame(next(self.log_block_generator)) |
| 39 | + xyz_info_dict = self.handle_single_xyz_frame(next(self.xyz_block_generator)) |
| 40 | + eq1 = [v1==v2 for v1,v2 in zip(log_info_dict['atom_numbs'], xyz_info_dict['atom_numbs'])] |
| 41 | + eq2 = [v1==v2 for v1,v2 in zip(log_info_dict['atom_names'], xyz_info_dict['atom_names'])] |
| 42 | + eq3 = [v1==v2 for v1,v2 in zip(log_info_dict['atom_types'], xyz_info_dict['atom_types'])] |
| 43 | + assert all(eq1), (log_info_dict,xyz_info_dict,'There may be errors in the file') |
| 44 | + assert all(eq2), (log_info_dict,xyz_info_dict,'There may be errors in the file') |
| 45 | + assert all(eq3), (log_info_dict,xyz_info_dict,'There may be errors in the file') |
| 46 | + assert log_info_dict['energies']==xyz_info_dict['energies'], (log_info_dict['energies'],xyz_info_dict['energies'],'There may be errors in the file') |
| 47 | + info_dict.update(log_info_dict) |
| 48 | + info_dict.update(xyz_info_dict) |
| 49 | + return info_dict |
| 50 | + |
| 51 | + def get_log_block_generator(self): |
| 52 | + lines = [] |
| 53 | + delimiter_flag = False |
| 54 | + while True: |
| 55 | + line = self.log_file_object.readline() |
| 56 | + if line: |
| 57 | + lines.append(line) |
| 58 | + if any(p.match(line) for p in delimiter_patterns): |
| 59 | + if delimiter_flag is True: |
| 60 | + yield lines |
| 61 | + lines = [] |
| 62 | + delimiter_flag = False |
| 63 | + else: |
| 64 | + line = self.log_file_object.readline() |
| 65 | + lines.append(line) |
| 66 | + if any(p.match(line) for p in avail_patterns): |
| 67 | + delimiter_flag = True |
| 68 | + else: |
| 69 | + break |
| 70 | + if delimiter_flag is True: |
| 71 | + raise RuntimeError('This file lacks some content, please check') |
| 72 | + |
| 73 | + def get_xyz_block_generator(self): |
| 74 | + p3 = re.compile(r'^\s*(\d+)\s*') |
| 75 | + while True: |
| 76 | + line = self.xyz_file_object.readline() |
| 77 | + if not line: |
| 78 | + break |
| 79 | + if p3.match(line): |
| 80 | + atom_num = int(p3.match(line).group(1)) |
| 81 | + lines = [] |
| 82 | + lines.append(line) |
| 83 | + for ii in range(atom_num+1): |
| 84 | + lines.append(self.xyz_file_object.readline()) |
| 85 | + if not lines[-1]: |
| 86 | + raise RuntimeError("this xyz file may lack of lines, should be {};lines:{}".format(atom_num+2, lines)) |
| 87 | + yield lines |
| 88 | + |
| 89 | + def handle_single_log_frame(self, lines): |
| 90 | + info_dict={} |
| 91 | + energy_pattern_1 = re.compile(r' INITIAL POTENTIAL ENERGY\[hartree\]\s+=\s+(?P<number>\S+)') |
| 92 | + # CONSERVED QUANTITY [hartree] = -0.279168013085E+04 |
| 93 | + energy_pattern_2 = re.compile(r' POTENTIAL ENERGY\[hartree\]\s+=\s+(?P<number>\S+)') |
| 94 | + energy=None |
| 95 | + cell_length_pattern = re.compile(r' INITIAL CELL LNTHS\[bohr\]\s+=\s+(?P<A>\S+)\s+(?P<B>\S+)\s+(?P<C>\S+)') |
| 96 | + cell_angle_pattern = re.compile(r' INITIAL CELL ANGLS\[deg\]\s+=\s+(?P<alpha>\S+)\s+(?P<beta>\S+)\s+(?P<gamma>\S+)') |
| 97 | + cell_A, cell_B, cell_C = (0,0,0,) |
| 98 | + cell_alpha, cell_beta, cell_gamma=(0,0,0,) |
| 99 | + force_start_pattern = re.compile(r' ATOMIC FORCES in') |
| 100 | + force_flag=False |
| 101 | + force_end_pattern = re.compile(r' SUM OF ATOMIC FORCES') |
| 102 | + force_lines= [] |
| 103 | + cell_flag=0 |
| 104 | + for line in lines: |
| 105 | + if force_start_pattern.match(line): |
| 106 | + force_flag=True |
| 107 | + if force_end_pattern.match(line): |
| 108 | + assert force_flag is True, (force_flag,'there may be errors in this file ') |
| 109 | + force_flag=False |
| 110 | + if force_flag is True: |
| 111 | + force_lines.append(line) |
| 112 | + if energy_pattern_1.match(line): |
| 113 | + energy = float(energy_pattern_1.match(line).groupdict()['number']) * AU_TO_EV |
| 114 | + if energy_pattern_2.match(line): |
| 115 | + energy = float(energy_pattern_2.match(line).groupdict()['number']) * AU_TO_EV |
| 116 | + if cell_length_pattern.match(line): |
| 117 | + cell_A = float(cell_length_pattern.match(line).groupdict()['A']) * AU_TO_ANG |
| 118 | + cell_B = float(cell_length_pattern.match(line).groupdict()['B']) * AU_TO_ANG |
| 119 | + cell_C = float(cell_length_pattern.match(line).groupdict()['C']) * AU_TO_ANG |
| 120 | + cell_flag+=1 |
| 121 | + if cell_angle_pattern.match(line): |
| 122 | + cell_alpha = np.deg2rad(float(cell_angle_pattern.match(line).groupdict()['alpha'])) |
| 123 | + cell_beta = np.deg2rad(float(cell_angle_pattern.match(line).groupdict()['beta'])) |
| 124 | + cell_gamma = np.deg2rad(float(cell_angle_pattern.match(line).groupdict()['gamma'])) |
| 125 | + cell_flag+=1 |
| 126 | + if cell_flag == 2: |
| 127 | + self.cell = cell_to_low_triangle(cell_A,cell_B,cell_C, |
| 128 | + cell_alpha,cell_beta,cell_gamma) |
| 129 | + # lx = cell_A |
| 130 | + # xy = cell_B * np.cos(cell_gamma) |
| 131 | + # xz = cell_C * np.cos(cell_beta) |
| 132 | + # ly = cell_B* np.sin(cell_gamma) |
| 133 | + # yz = (cell_B*cell_C*np.cos(cell_alpha)-xy*xz)/ly |
| 134 | + # lz = np.sqrt(cell_C**2-xz**2-yz**2) |
| 135 | + # self.cell = [[lx, 0 , 0], |
| 136 | + # [xy, ly, 0 ], |
| 137 | + # [xz, yz, lz]] |
| 138 | + |
| 139 | + element_index = -1 |
| 140 | + element_dict = OrderedDict() |
| 141 | + atom_types_list = [] |
| 142 | + forces_list = [] |
| 143 | + for line in force_lines[3:]: |
| 144 | + line_list = line.split() |
| 145 | + if element_dict.get(line_list[2]): |
| 146 | + element_dict[line_list[2]][1]+=1 |
| 147 | + else: |
| 148 | + element_index +=1 |
| 149 | + element_dict[line_list[2]]=[element_index,1] |
| 150 | + atom_types_list.append(element_dict[line_list[2]][0]) |
| 151 | + forces_list.append([float(line_list[3])*AU_TO_EV_EVERY_ANG, |
| 152 | + float(line_list[4])*AU_TO_EV_EVERY_ANG, |
| 153 | + float(line_list[5])*AU_TO_EV_EVERY_ANG]) |
| 154 | + |
| 155 | + atom_names=list(element_dict.keys()) |
| 156 | + atom_numbs=[] |
| 157 | + for ii in atom_names: |
| 158 | + atom_numbs.append(element_dict[ii][1]) |
| 159 | + info_dict['atom_names'] = atom_names |
| 160 | + info_dict['atom_numbs'] = atom_numbs |
| 161 | + info_dict['atom_types'] = np.asarray(atom_types_list) |
| 162 | + info_dict['cells'] = np.asarray([self.cell]).astype('float32') |
| 163 | + info_dict['energies'] = np.asarray([energy]).astype('float32') |
| 164 | + info_dict['forces'] = np.asarray([forces_list]).astype('float32') |
| 165 | + return info_dict |
| 166 | + |
| 167 | + def handle_single_xyz_frame(self, lines): |
| 168 | + info_dict = {} |
| 169 | + atom_num = int(lines[0].strip('\n').strip()) |
| 170 | + if len(lines) != atom_num + 2: |
| 171 | + raise RuntimeError("format error, atom_num=={}, {}!=atom_num+2".format(atom_num, len(lines))) |
| 172 | + data_format_line = lines[1].strip('\n').strip()+str(' ') |
| 173 | + prop_pattern = re.compile(r'(?P<prop>\w+)\s*=\s*(?P<number>.*?)[, ]') |
| 174 | + prop_dict = dict(prop_pattern.findall(data_format_line)) |
| 175 | + |
| 176 | + energy=0 |
| 177 | + if prop_dict.get('E'): |
| 178 | + energy = float(prop_dict.get('E')) * AU_TO_EV |
| 179 | + # info_dict['energies'] = np.array([prop_dict['E']]).astype('float32') |
| 180 | + |
| 181 | + element_index = -1 |
| 182 | + element_dict = OrderedDict() |
| 183 | + atom_types_list = [] |
| 184 | + coords_list = [] |
| 185 | + for line in lines[2:]: |
| 186 | + line_list = line.split() |
| 187 | + if element_dict.get(line_list[0]): |
| 188 | + element_dict[line_list[0]][1]+=1 |
| 189 | + else: |
| 190 | + element_index +=1 |
| 191 | + element_dict[line_list[0]]=[element_index,1] |
| 192 | + atom_types_list.append(element_dict[line_list[0]][0]) |
| 193 | + coords_list.append([float(line_list[1])*AU_TO_ANG, |
| 194 | + float(line_list[2])*AU_TO_ANG, |
| 195 | + float(line_list[3])*AU_TO_ANG]) |
| 196 | + atom_names=list(element_dict.keys()) |
| 197 | + atom_numbs=[] |
| 198 | + for ii in atom_names: |
| 199 | + atom_numbs.append(element_dict[ii][1]) |
| 200 | + info_dict['atom_names'] = atom_names |
| 201 | + info_dict['atom_numbs'] = atom_numbs |
| 202 | + info_dict['atom_types'] = np.asarray(atom_types_list) |
| 203 | + info_dict['coords'] = np.asarray([coords_list]).astype('float32') |
| 204 | + info_dict['energies'] = np.array([energy]).astype('float32') |
| 205 | + info_dict['orig']=[0,0,0] |
| 206 | + return info_dict |
| 207 | + |
| 208 | +#%% |
3 | 209 |
|
4 | 210 | def get_frames (fname) :
|
5 | 211 | coord_flag = False
|
6 | 212 | force_flag = False
|
7 |
| - eV = 2.72113838565563E+01 |
8 |
| - angstrom = 5.29177208590000E-01 |
| 213 | + eV = 2.72113838565563E+01 # hatree to eV |
| 214 | + angstrom = 5.29177208590000E-01 # Bohrto Angstrom |
9 | 215 | fp = open(fname)
|
10 | 216 | atom_symbol_list = []
|
11 | 217 | cell = []
|
@@ -74,3 +280,6 @@ def get_frames (fname) :
|
74 | 280 | return list(atom_names), atom_numbs, atom_types, cell, coord, energy, force
|
75 | 281 |
|
76 | 282 |
|
| 283 | + |
| 284 | + |
| 285 | +# %% |
0 commit comments