Skip to content

Commit b0cb213

Browse files
authored
Merge pull request #110 from amcadmus/master
merge devel into master
2 parents 6306863 + 47c29d2 commit b0cb213

37 files changed

+9495
-51
lines changed

.gitignore

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -20,3 +20,4 @@ dist
2020
dpdata.egg-info
2121
_version.py
2222
!tests/cp2k/aimd/cp2k.log
23+
__pycache__

README.md

Lines changed: 23 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -71,6 +71,8 @@ The `System` or `LabeledSystem` can be constructed from the following file forma
7171
| QE | log | False | True | LabeledSystem | 'qe/pw/scf' |
7272
| QE | log | True | False | System | 'qe/cp/traj' |
7373
| QE | log | True | True | LabeledSystem | 'qe/cp/traj' |
74+
| Fhi-aims| output | True | True | LabeledSystem | 'fhi_aims/md' |
75+
| Fhi-aims| output | False | True | LabeledSystem | 'fhi_aims/scf' |
7476
|quip/gap|xyz|True|True|MultiSystems|'quip/gap/xyz'|
7577
| PWmat | atom.config | False | False | System | 'pwmat/atom.config' |
7678
| PWmat | movement | True | True | LabeledSystem | 'pwmat/movement' |
@@ -112,6 +114,27 @@ xyz_multi_systems.systems['B1C9'].to_deepmd_raw('./my_work_dir/B1C9_raw')
112114
xyz_multi_systems.to_deepmd_raw('./my_deepmd_data/')
113115
```
114116

117+
You may also use the following code to parse muti-system:
118+
```
119+
from dpdata import LabeledSystem,MultiSystems
120+
from glob import glob
121+
"""
122+
process multi systems
123+
"""
124+
fs=glob('./*/OUTCAR') # remeber to change here !!!
125+
ms=MultiSystems()
126+
for f in fs:
127+
try:
128+
ls=LabeledSystem(f)
129+
except:
130+
print(f)
131+
if len(ls)>0:
132+
ms.append(ls)
133+
134+
ms.to_deepmd_raw('deepmd')
135+
ms.to_deepmd_npy('deepmd')
136+
```
137+
115138
## Access data
116139
These properties stored in `System` and `LabeledSystem` can be accessed by operator `[]` with the key of the property supplied, for example
117140
```python

dpdata/cp2k/output.py

Lines changed: 13 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -20,7 +20,7 @@
2020

2121
class Cp2kSystems(object):
2222
"""
23-
deal with cp2k outputfile
23+
deal with cp2k outputfile
2424
"""
2525
def __init__(self, log_file_name, xyz_file_name):
2626
self.log_file_object = open(log_file_name, 'r')
@@ -69,7 +69,7 @@ def get_log_block_generator(self):
6969
break
7070
if delimiter_flag is True:
7171
raise RuntimeError('This file lacks some content, please check')
72-
72+
7373
def get_xyz_block_generator(self):
7474
p3 = re.compile(r'^\s*(\d+)\s*')
7575
while True:
@@ -116,7 +116,7 @@ def handle_single_log_frame(self, lines):
116116
if cell_length_pattern.match(line):
117117
cell_A = float(cell_length_pattern.match(line).groupdict()['A']) * AU_TO_ANG
118118
cell_B = float(cell_length_pattern.match(line).groupdict()['B']) * AU_TO_ANG
119-
cell_C = float(cell_length_pattern.match(line).groupdict()['C']) * AU_TO_ANG
119+
cell_C = float(cell_length_pattern.match(line).groupdict()['C']) * AU_TO_ANG
120120
cell_flag+=1
121121
if cell_angle_pattern.match(line):
122122
cell_alpha = np.deg2rad(float(cell_angle_pattern.match(line).groupdict()['alpha']))
@@ -148,10 +148,10 @@ def handle_single_log_frame(self, lines):
148148
element_index +=1
149149
element_dict[line_list[2]]=[element_index,1]
150150
atom_types_list.append(element_dict[line_list[2]][0])
151-
forces_list.append([float(line_list[3])*AU_TO_EV_EVERY_ANG,
152-
float(line_list[4])*AU_TO_EV_EVERY_ANG,
151+
forces_list.append([float(line_list[3])*AU_TO_EV_EVERY_ANG,
152+
float(line_list[4])*AU_TO_EV_EVERY_ANG,
153153
float(line_list[5])*AU_TO_EV_EVERY_ANG])
154-
154+
155155
atom_names=list(element_dict.keys())
156156
atom_numbs=[]
157157
for ii in atom_names:
@@ -190,8 +190,8 @@ def handle_single_xyz_frame(self, lines):
190190
element_index +=1
191191
element_dict[line_list[0]]=[element_index,1]
192192
atom_types_list.append(element_dict[line_list[0]][0])
193-
coords_list.append([float(line_list[1])*AU_TO_ANG,
194-
float(line_list[2])*AU_TO_ANG,
193+
coords_list.append([float(line_list[1])*AU_TO_ANG,
194+
float(line_list[2])*AU_TO_ANG,
195195
float(line_list[3])*AU_TO_ANG])
196196
atom_names=list(element_dict.keys())
197197
atom_numbs=[]
@@ -203,29 +203,30 @@ def handle_single_xyz_frame(self, lines):
203203
info_dict['coords'] = np.asarray([coords_list]).astype('float32')
204204
info_dict['energies'] = np.array([energy]).astype('float32')
205205
info_dict['orig']=[0,0,0]
206-
return info_dict
206+
return info_dict
207207

208208
#%%
209209

210210
def get_frames (fname) :
211211
coord_flag = False
212212
force_flag = False
213213
eV = 2.72113838565563E+01 # hatree to eV
214-
angstrom = 5.29177208590000E-01 # Bohrto Angstrom
214+
angstrom = 5.29177208590000E-01 # Bohrto Angstrom
215215
fp = open(fname)
216216
atom_symbol_list = []
217217
cell = []
218218
coord = []
219219
force = []
220-
220+
coord_count = 0
221221
for idx, ii in enumerate(fp) :
222222
if 'CELL| Vector' in ii :
223223
cell.append(ii.split()[4:7])
224224
if 'Atom Kind Element' in ii :
225225
coord_flag = True
226226
coord_idx = idx
227+
coord_count += 1
227228
# get the coord block info
228-
if coord_flag :
229+
if coord_flag and (coord_count == 1):
229230
if (idx > coord_idx + 1) :
230231
if (ii == '\n') :
231232
coord_flag = False

dpdata/deepmd/comp.py

Lines changed: 5 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -22,7 +22,7 @@ def to_system_data(folder,
2222
# data is empty
2323
data = load_type(folder, type_map = type_map)
2424
data['orig'] = np.zeros([3])
25-
sets = glob.glob(os.path.join(folder, 'set.*'))
25+
sets = sorted(glob.glob(os.path.join(folder, 'set.*')))
2626
all_cells = []
2727
all_coords = []
2828
all_eners = []
@@ -36,11 +36,11 @@ def to_system_data(folder,
3636
if eners is not None:
3737
eners = np.reshape(eners, [nframes])
3838
if labels:
39-
if eners is not None and len(eners) > 0:
39+
if eners is not None and eners.size > 0:
4040
all_eners.append(np.reshape(eners, [nframes]))
41-
if forces is not None and len(forces) > 0:
41+
if forces is not None and forces.size > 0:
4242
all_forces.append(np.reshape(forces, [nframes,-1,3]))
43-
if virs is not None and len(virs) > 0:
43+
if virs is not None and virs.size > 0:
4444
all_virs.append(np.reshape(virs, [nframes,3,3]))
4545
data['cells'] = np.concatenate(all_cells, axis = 0)
4646
data['coords'] = np.concatenate(all_coords, axis = 0)
@@ -61,7 +61,7 @@ def dump(folder,
6161
comp_prec = np.float32,
6262
remove_sets = True) :
6363
os.makedirs(folder, exist_ok = True)
64-
sets = glob.glob(os.path.join(folder, 'set.*'))
64+
sets = sorted(glob.glob(os.path.join(folder, 'set.*')))
6565
if len(sets) > 0:
6666
if remove_sets :
6767
for ii in sets :

dpdata/fhi_aims/output.py

Lines changed: 12 additions & 13 deletions
Original file line numberDiff line numberDiff line change
@@ -8,6 +8,7 @@
88
eng_patt="Total energy uncorrected.*([-]?[0-9]{1,}[.][0-9]*[E][+-][0-9]{1,})\s+eV"
99
#atom_numb_patt="Number of atoms.*([0-9]{1,})"
1010

11+
debug = False
1112
def get_info (lines, type_idx_zero = False) :
1213

1314
atom_types = []
@@ -31,26 +32,23 @@ def get_info (lines, type_idx_zero = False) :
3132
v_str=ii.split('|')[1].split()
3233
vect=[float(kk) for kk in v_str]
3334
cell.append(vect)
34-
# print(cell)
35-
#atom name
35+
3636
_tmp=re.findall(pos_patt_first,contents)
3737
for ii in _tmp:
3838
_atom_names.append(ii[0])
3939
atom_names=[]
4040
for ii in _atom_names:
4141
if not ii in atom_names:
4242
atom_names.append(ii)
43-
#atom number
44-
#_atom_numb_patt=re.compile(atom_numb_patt)
43+
4544
atom_numbs =[_atom_names.count(ii) for ii in atom_names]
45+
if type_idx_zero :
46+
type_map=dict(zip(atom_names,range(len(atom_names))))
47+
else:
48+
type_map=dict(zip(atom_names,range(1,len(atom_names)+1)))
49+
atom_types=list(map(lambda k: type_map[k], _atom_names))
4650
assert(atom_numbs is not None), "cannot find ion type info in aims output"
47-
48-
for idx,ii in enumerate(atom_numbs) :
49-
for jj in range(ii) :
50-
if type_idx_zero :
51-
atom_types.append(idx)
52-
else :
53-
atom_types.append(idx+1)
51+
5452

5553
return [cell, atom_numbs, atom_names, atom_types ]
5654

@@ -81,8 +79,9 @@ def get_frames (fname, md=True, begin = 0, step = 1) :
8179

8280
cc = 0
8381
while len(blk) > 0 :
84-
# with open(str(cc),'w') as f:
85-
# f.write('\n'.join(blk))
82+
if debug:
83+
with open(str(cc),'w') as f:
84+
f.write('\n'.join(blk))
8685
if cc >= begin and (cc - begin) % step == 0 :
8786
if cc==0:
8887
coord, _cell, energy, force, virial, is_converge = analyze_block(blk, first_blk=True, md=md)

dpdata/lammps/dump.py

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -83,9 +83,9 @@ def get_posi_frac(lines) :
8383
posis = np.array(posis)
8484
return posis[:,1:4]
8585

86-
def safe_get_posi(lines, cell):
86+
def safe_get_posi(lines, cell, orig = np.zeros(3)):
8787
try:
88-
posis = get_posi(lines)
88+
posis = get_posi(lines) - orig
8989
except ValueError:
9090
fposis = get_posi_frac(lines)
9191
posis = fposis @ cell
@@ -173,12 +173,12 @@ def system_data(lines, type_map = None, type_idx_zero = True) :
173173
system['cells'] = [np.array(cell)]
174174
natoms = sum(system['atom_numbs'])
175175
system['atom_types'] = get_atype(lines, type_idx_zero = type_idx_zero)
176-
system['coords'] = [safe_get_posi(lines, cell) - np.array(orig)]
176+
system['coords'] = [safe_get_posi(lines, cell, np.array(orig))]
177177
for ii in range(1, len(array_lines)) :
178178
bounds, tilt = get_dumpbox(array_lines[ii])
179179
orig, cell = dumpbox2box(bounds, tilt)
180180
system['cells'].append(cell)
181-
system['coords'].append(safe_get_posi(array_lines[ii], cell) - np.array(orig))
181+
system['coords'].append(safe_get_posi(array_lines[ii], cell, np.array(orig)))
182182
system['cells'] = np.array(system['cells'])
183183
system['coords'] = np.array(system['coords'])
184184
return system

0 commit comments

Comments
 (0)