Skip to content

Commit bb14c5a

Browse files
wanghan-iapcmHan Wang
andauthored
fix: issue 822 and 836 (#844)
- fix #822 . now can handle nwrite = 0 - fix #836 . when the key TITEL is missing. <!-- This is an auto-generated comment: release notes by coderabbit.ai --> ## Summary by CodeRabbit * **New Features** * Improved extraction of atom names from VASP output files, enhancing compatibility with VASP version 6 files. * Added support for reading the NWRITE parameter from OUTCAR files. * Enhanced validation to ensure coordinates, cell, and forces are present in parsed frames. * **Bug Fixes** * Improved handling of OUTCAR files lacking TITEL lines for atom names. * **Tests** * Added new tests to verify correct frame extraction and atom name parsing from both OUTCAR and XML files, including cases with NWRITE=0 and VASP 6 output formats. * **Chores** * Included new sample OUTCAR and vasprun.xml files for expanded test coverage. <!-- end of auto-generated comment: release notes by coderabbit.ai --> --------- Co-authored-by: Han Wang <[email protected]>
1 parent b94b0a7 commit bb14c5a

File tree

7 files changed

+11006
-17
lines changed

7 files changed

+11006
-17
lines changed

dpdata/vasp/outcar.py

Lines changed: 91 additions & 17 deletions
Original file line numberDiff line numberDiff line change
@@ -6,31 +6,93 @@
66
import numpy as np
77

88

9-
def system_info(lines, type_idx_zero=False):
9+
def atom_name_from_potcar_string(instr: str) -> str:
10+
"""Get atom name from a potcar element name.
11+
12+
e.g. Sn_d -> Sn
13+
14+
Parameters
15+
----------
16+
instr : str
17+
input potcar elemenet name
18+
19+
Returns
20+
-------
21+
name: str
22+
name of atoms
23+
"""
24+
if "_" in instr:
25+
# for case like : TITEL = PAW_PBE Sn_d 06Sep2000
26+
return instr.split("_")[0]
27+
else:
28+
return instr
29+
30+
31+
def system_info(
32+
lines: list[str],
33+
type_idx_zero: bool = False,
34+
) -> tuple[list[str], list[int], np.ndarray, int | None, int | None]:
35+
"""Get system information from lines of an OUTCAR file.
36+
37+
Parameters
38+
----------
39+
lines : list[str]
40+
the lines of the OUTCAR file
41+
type_idx_zero : bool
42+
if true atom types starts from 0 otherwise from 1.
43+
44+
Returns
45+
-------
46+
atom_names: list[str]
47+
name of atoms
48+
atom_numbs: list[int]
49+
number of atoms that have a certain name. same length as atom_names
50+
atom_types: np.ndarray
51+
type of each atom, the array has same lenght as number of atoms
52+
nelm: optional[int]
53+
the value of NELM parameter
54+
nwrite: optional[int]
55+
the value of NWRITE parameter
56+
"""
1057
atom_names = []
58+
atom_names_potcar = []
1159
atom_numbs = None
1260
nelm = None
61+
nwrite = None
1362
for ii in lines:
14-
ii_word_list = ii.split()
1563
if "TITEL" in ii:
1664
# get atom names from POTCAR info, tested only for PAW_PBE ...
65+
# for case like : TITEL = PAW_PBE Sn_d 06Sep2000
1766
_ii = ii.split()[3]
18-
if "_" in _ii:
19-
# for case like : TITEL = PAW_PBE Sn_d 06Sep2000
20-
atom_names.append(_ii.split("_")[0])
21-
else:
22-
atom_names.append(_ii)
67+
atom_names.append(atom_name_from_potcar_string(_ii))
68+
elif "POTCAR:" in ii:
69+
# get atom names from POTCAR info, tested only for PAW_PBE ...
70+
# for case like : POTCAR: PAW_PBE Ti 08Apr2002
71+
_ii = ii.split()[2]
72+
atom_names_potcar.append(atom_name_from_potcar_string(_ii))
2373
# a stricker check for "NELM"; compatible with distingct formats in different versions(6 and older, newers_expect-to-work) of vasp
2474
elif nelm is None:
2575
m = re.search(r"NELM\s*=\s*(\d+)", ii)
2676
if m:
2777
nelm = int(m.group(1))
78+
elif nwrite is None:
79+
m = re.search(r"NWRITE\s*=\s*(\d+)", ii)
80+
if m:
81+
nwrite = int(m.group(1))
2882
if "ions per type" in ii:
2983
atom_numbs_ = [int(s) for s in ii.split()[4:]]
3084
if atom_numbs is None:
3185
atom_numbs = atom_numbs_
3286
else:
3387
assert atom_numbs == atom_numbs_, "in consistent numb atoms in OUTCAR"
88+
if len(atom_names) == 0:
89+
# try to use atom_names_potcar
90+
if len(atom_names_potcar) == 0:
91+
raise ValueError("cannot get atom names from potcar")
92+
nnames = len(atom_names_potcar)
93+
# the names are repeated. check if it is the case
94+
assert atom_names_potcar[: nnames // 2] == atom_names_potcar[nnames // 2 :]
95+
atom_names = atom_names_potcar[: nnames // 2]
3496
assert nelm is not None, "cannot find maximum steps for each SC iteration"
3597
assert atom_numbs is not None, "cannot find ion type info in OUTCAR"
3698
atom_names = atom_names[: len(atom_numbs)]
@@ -41,7 +103,7 @@ def system_info(lines, type_idx_zero=False):
41103
atom_types.append(idx)
42104
else:
43105
atom_types.append(idx + 1)
44-
return atom_names, atom_numbs, np.array(atom_types, dtype=int), nelm
106+
return atom_names, atom_numbs, np.array(atom_types, dtype=int), nelm, nwrite
45107

46108

47109
def get_outcar_block(fp, ml=False):
@@ -57,12 +119,24 @@ def get_outcar_block(fp, ml=False):
57119
return blk
58120

59121

122+
def check_outputs(coord, cell, force):
123+
if len(force) == 0:
124+
raise ValueError("cannot find forces in OUTCAR block")
125+
if len(coord) == 0:
126+
raise ValueError("cannot find coordinates in OUTCAR block")
127+
if len(cell) == 0:
128+
raise ValueError("cannot find cell in OUTCAR block")
129+
return True
130+
131+
60132
# we assume that the force is printed ...
61133
def get_frames(fname, begin=0, step=1, ml=False, convergence_check=True):
62134
fp = open(fname)
63135
blk = get_outcar_block(fp)
64136

65-
atom_names, atom_numbs, atom_types, nelm = system_info(blk, type_idx_zero=True)
137+
atom_names, atom_numbs, atom_types, nelm, nwrite = system_info(
138+
blk, type_idx_zero=True
139+
)
66140
ntot = sum(atom_numbs)
67141

68142
all_coords = []
@@ -78,9 +152,15 @@ def get_frames(fname, begin=0, step=1, ml=False, convergence_check=True):
78152
coord, cell, energy, force, virial, is_converge = analyze_block(
79153
blk, ntot, nelm, ml
80154
)
81-
if len(coord) == 0:
155+
if energy is None:
82156
break
83-
if is_converge or not convergence_check:
157+
if nwrite == 0:
158+
has_label = len(force) > 0 and len(coord) > 0 and len(cell) > 0
159+
if not has_label:
160+
warnings.warn("cannot find labels in the frame, ingore")
161+
else:
162+
has_label = check_outputs(coord, cell, force)
163+
if (is_converge or not convergence_check) and has_label:
84164
all_coords.append(coord)
85165
all_cells.append(cell)
86166
all_energies.append(energy)
@@ -144,12 +224,6 @@ def analyze_block(lines, ntot, nelm, ml=False):
144224
is_converge = False
145225
elif energy_token[ml_index] in ii:
146226
energy = float(ii.split()[energy_index[ml_index]])
147-
if len(force) == 0:
148-
raise ValueError("cannot find forces in OUTCAR block")
149-
if len(coord) == 0:
150-
raise ValueError("cannot find coordinates in OUTCAR block")
151-
if len(cell) == 0:
152-
raise ValueError("cannot find cell in OUTCAR block")
153227
return coord, cell, energy, force, virial, is_converge
154228
elif cell_token[ml_index] in ii:
155229
for dd in range(3):

0 commit comments

Comments
 (0)