Skip to content

Commit 18eab9c

Browse files
committed
Parse only POTCAR metadata from OUTCAR when supplementing vasprun.xml
When vasprun.xml is the mainfile, OUTCAR now only supplements pseudopotential metadata (SHA256, LPAW, LULTRA, ENMAX, etc.) instead of parsing all data. This avoids redundant parsing since vasprun.xml already contains eigenvalues, energies, and forces. Changes: - Create minimal `outcar_supplement_parser` with only pseudopotential quantity - Fix POTCAR regex to require VRHFIN within matched content, not just ahead: `POTCAR:([\s\S]+?VRHFIN[\s\S]+?)(?=\s*POTCAR:|\s*local pseudopotential:|\Z)` This prevents matching header-only POTCAR lines that lack detailed metadata - Simplify `_find_outcar()` to use generator expression with `next()` - Update comments to reflect POTCAR-specific scope Fixes issue where 4 pseudopotentials were parsed instead of 2 due to regex matching both header-only lines and detailed sections.
1 parent b059c6b commit 18eab9c

File tree

2 files changed

+32
-19
lines changed

2 files changed

+32
-19
lines changed

src/nomad_simulation_parsers/parsers/vasp/outcar_parser.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -336,7 +336,7 @@ def str_to_eigenvalues(val_in):
336336
),
337337
Quantity(
338338
'pseudopotentials',
339-
r'POTCAR:(?=[\s\S]*?VRHFIN)([\s\S]+?)(?=\s*$|\s*POTCAR:|\s*local pseudopotential:)',
339+
r'POTCAR:([\s\S]+?VRHFIN[\s\S]+?)(?=\s*POTCAR:|\s*local pseudopotential:|\Z)',
340340
repeats=True,
341341
sub_parser=TextParser(quantities=potcar_quantities),
342342
),

src/nomad_simulation_parsers/parsers/vasp/xml_parser.py

Lines changed: 31 additions & 18 deletions
Original file line numberDiff line numberDiff line change
@@ -92,23 +92,36 @@ def write_to_archive(self) -> None:
9292
data_parser.annotation_key = vasp.XML2_KEY
9393
xml_parser.convert(data_parser)
9494

95-
# Third pass: OUTCAR_KEY to extend with OUTCAR data if available
95+
# Third pass: OUTCAR_KEY to extend with OUTCAR pseudopotential metadata
9696
# This allows OUTCAR to supplement vasprun.xml pseudopotentials with
97-
# detailed metadata
97+
# detailed metadata (SHA256, LPAW, LULTRA, etc.)
9898
outcar_path = self._find_outcar()
9999
if outcar_path and os.path.exists(outcar_path):
100100
LOGGER.info(
101-
f'Found OUTCAR at {outcar_path}, extending vasprun.xml data '
102-
'with detailed pseudopotential metadata'
101+
f'Found OUTCAR at {outcar_path}, extending vasprun.xml pseudopotentials '
102+
'with detailed metadata'
103+
)
104+
from nomad.parsing.file_parser import Quantity, TextParser
105+
from nomad.parsing.file_parser.mapping_parser import (
106+
TextParser as MappingTextParser,
103107
)
104108
from nomad_simulation_parsers.parsers.vasp.outcar_parser import (
105-
OutcarParser,
106-
OutcarTextParser,
109+
potcar_quantities,
110+
)
111+
112+
outcar_supplement_parser = TextParser(
113+
quantities=[
114+
Quantity(
115+
'pseudopotentials',
116+
r'POTCAR:([\s\S]+?VRHFIN[\s\S]+?)(?=\s*POTCAR:|\s*local pseudopotential:|\Z)',
117+
repeats=True,
118+
sub_parser=TextParser(quantities=potcar_quantities),
119+
)
120+
]
107121
)
108122

109-
outcar_parser = OutcarParser()
110-
outcar_parser.text_parser = OutcarTextParser()
111-
outcar_parser.filepath = outcar_path
123+
outcar_parser = MappingTextParser(filepath=outcar_path)
124+
outcar_parser.text_parser = outcar_supplement_parser
112125

113126
data_parser.annotation_key = vasp.OUTCAR_KEY
114127
# Merge by index position: OUTCAR PP[0] extends XML PP[0], etc.
@@ -124,13 +137,13 @@ def write_to_archive(self) -> None:
124137
xml_parser.close()
125138

126139
def _find_outcar(self) -> str | None:
127-
"""Find OUTCAR file in the same directory as vasprun.xml."""
128-
mainfile_dir = PathLib(self.mainfile).parent
140+
"""Find OUTCAR file in the same directory as vasprun.xml.
129141
130-
# Check for any file starting with 'outcar' (case-insensitive)
131-
# Catches: OUTCAR, outcar, OUTCAR.gz, outcar.bz2, etc.
132-
for file in mainfile_dir.iterdir():
133-
if file.name.lower().startswith('outcar'):
134-
return str(file)
135-
136-
return None
142+
Matches any file starting with 'outcar' (case-insensitive):
143+
OUTCAR, outcar, OUTCAR.gz, outcar.bz2, etc.
144+
"""
145+
mainfile_dir = PathLib(self.mainfile).parent
146+
return next(
147+
(str(f) for f in mainfile_dir.iterdir() if f.name.lower().startswith('outcar')),
148+
None
149+
)

0 commit comments

Comments
 (0)