Skip to content

Commit 851e7aa

Browse files
authored
Merge pull request #3 from FAIRmat-NFDI/vasp-mapping-parser
Vasp mapping parser
2 parents 4b60d57 + d384524 commit 851e7aa

File tree

12 files changed

+1081
-120
lines changed

12 files changed

+1081
-120
lines changed

pyproject.toml

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -122,6 +122,8 @@ where = ["src"]
122122
[project.entry-points.'nomad.plugin']
123123
exciting_parser_entry_point = "nomad_simulation_parsers.parsers:exciting_parser_entry_point"
124124
exciting_schema_package_entry_point = "nomad_simulation_parsers.schema_packages:exciting_schema_package_entry_point"
125+
vasp_parser_entry_point = "nomad_simulation_parsers.parsers:vasp_parser_entry_point"
126+
vasp_schema_package_entry_point = "nomad_simulation_parsers.schema_packages:vasp_schema_package_entry_point"
125127

126128

127129

Lines changed: 27 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,12 +1,20 @@
11
from nomad.config.models.plugins import ParserEntryPoint
2+
from pydantic import Field
23

34

45
class EntryPoint(ParserEntryPoint):
6+
parser_class_name: str = Field(
7+
description="""
8+
The fully qualified name of the Python class that implements the parser.
9+
This class must have a function `def parse(self, mainfile, archive, logger)`.
10+
"""
11+
)
12+
513
def load(self):
614
from nomad.parsing.parser import MatchingParserInterface
715

816
return MatchingParserInterface(
9-
parser_class_name='nomad_simulation_parsers.parsers.exciting.parser.ExcitingParser',
17+
self.parser_class_name,
1018
**self.dict(),
1119
)
1220

@@ -15,9 +23,27 @@ def load(self):
1523
name='parsers/exciting',
1624
aliases=['parsers/exciting'],
1725
description='NOMAD parser for EXCITING.',
26+
parser_class_name='nomad_simulation_parsers.parsers.exciting.parser.ExcitingParser',
1827
python_package='nomad_simulation_parsers',
1928
mainfile_contents_re=r'EXCITING.*started[\s\S]+?All units are atomic ',
2029
mainfile_name_re=r'^.*.OUT(\.[^/]*)?$',
2130
code_name='exciting',
2231
code_homepage='http://exciting-code.org/',
2332
)
33+
34+
vasp_parser_entry_point = EntryPoint(
35+
name='parsers/vasp',
36+
description='Parser for VASP XML and OUTCAR outputs',
37+
parser_class_name='nomad_simulation_parsers.parsers.vasp.parser.VASPParser',
38+
python_package='nomad_simulation_parsers',
39+
code_name='VASP',
40+
mainfile_contents_re=(
41+
r'^\s*<\?xml version="1\.0" encoding="ISO-8859-1"\?>\s*?\s*<modeling>?\s*'
42+
r'<generator>?\s*<i name="program" type="string">\s*vasp\s*</i>?|'
43+
r'^\svasp[\.\d]+.+?(?:\(build|complex)[\s\S]+?executed on'
44+
),
45+
mainfile_mime_re='(application/.*)|(text/.*)',
46+
mainfile_name_re='.*[^/]*xml[^/]*',
47+
mainfile_alternative=True,
48+
supported_compressions=['gz', 'bz2', 'xz'],
49+
)

src/nomad_simulation_parsers/parsers/exciting/parser.py

Lines changed: 12 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -20,8 +20,7 @@
2020
from nomad.units import ureg
2121
from nomad_simulations.schema_packages.general import Simulation
2222

23-
import nomad_simulation_parsers.schema_packages.exciting # noqa
24-
from nomad_simulation_parsers.parsers.utils import search_files
23+
from nomad_simulation_parsers.parsers.utils.general import search_files, remove_mapping_annotations
2524

2625
from .eigval_reader import EigvalReader
2726
from .info_reader import InfoReader
@@ -143,6 +142,8 @@ class ExcitingParser(Parser):
143142
def parse(
144143
self, mainfile: str, archive: 'EntryArchive', logger: 'BoundLogger'
145144
) -> None:
145+
from nomad_simulation_parsers.schema_packages import exciting
146+
146147
maindir = os.path.dirname(mainfile)
147148
mainbase = os.path.basename(mainfile)
148149

@@ -165,6 +166,7 @@ def parse(
165166
input_xml_parser = InputXMLParser(filepath=input_xml_files[0])
166167
data_parser.annotation_key = 'input_xml'
167168
input_xml_parser.convert(data_parser)
169+
input_xml_parser.close()
168170

169171
# eigenvalues from eigval.out
170172
eigval_files = search_files('EIGVAL.OUT', maindir, mainbase)
@@ -174,7 +176,7 @@ def parse(
174176
)
175177
data_parser.annotation_key = 'eigval'
176178
eigval_parser.convert(data_parser, update_mode='merge@-1')
177-
self.eigval_parser = eigval_parser
179+
eigval_parser.close()
178180

179181
# bandstructure from bandstructure.xml
180182
bandstructure_files = search_files('bandstructure.xml', maindir, mainbase)
@@ -185,21 +187,21 @@ def parse(
185187
# TODO set n_spin from info
186188
data_parser.annotation_key = 'bandstructure_xml'
187189
bandstructure_parser.convert(data_parser, update_mode='merge@-1')
188-
self.bandstructure_parser = bandstructure_parser
190+
bandstructure_parser.close()
189191

190192
# dos from dos.xml
191193
dos_files = search_files('dos.xml', maindir, mainbase)
192194
if dos_files:
193195
dos_parser = DosXMLParser(filepath=dos_files[0])
194196
data_parser.annotation_key = 'dos_xml'
195197
dos_parser.convert(data_parser, update_mode='merge@-1')
196-
self.dos_parser = dos_parser
198+
dos_parser.close()
197199

198200
archive.data = data_parser.data_object
199201

200-
self.info_parser = info_parser
201202
# close parsers
202-
# info_parser.close()
203-
# input_xml_parser.close()
204-
# eigval_parser.close()
205-
# data_parser.close()
203+
info_parser.close()
204+
data_parser.close()
205+
206+
# remove annotations
207+
remove_mapping_annotations(exciting.general.Simulation.m_def)

src/nomad_simulation_parsers/parsers/parser.py

Lines changed: 0 additions & 32 deletions
This file was deleted.

src/nomad_simulation_parsers/parsers/utils.py

Lines changed: 0 additions & 39 deletions
This file was deleted.
Lines changed: 82 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,82 @@
1+
import os
2+
import re
3+
from glob import glob
4+
from typing import Union
5+
6+
from nomad.metainfo import Section, SubSection
7+
8+
9+
def search_files(
10+
pattern: str,
11+
basedir: str,
12+
deep: bool = True,
13+
max_dirs: int = 10,
14+
re_pattern: str = '',
15+
) -> list[str]:
16+
"""Search files following the `pattern` starting from `basedir`. The search is
17+
performed recursively in all sub-folders (deep=True) or parent folders (deep=False).
18+
A futher regex search with `re_pattern` is done to filter the matching files.
19+
20+
Args:
21+
pattern (str): pattern to match the files in the folder
22+
basedir (str): directory to start the search
23+
deep (bool, optional): folders search direction (True=down, False=up)
24+
re_pattern (str, optional): additional regex pattern to filter matching files
25+
26+
Returns:
27+
list: list of matching files
28+
"""
29+
30+
for _ in range(max_dirs):
31+
filenames = glob(f'{basedir}/{pattern}')
32+
pattern = os.path.join('**' if deep else '..', pattern)
33+
if filenames:
34+
break
35+
36+
if len(filenames) > 1:
37+
# filter files that match
38+
matches = [f for f in filenames if re.search(re_pattern, f)]
39+
filenames = matches if matches else filenames
40+
41+
filenames = [f for f in filenames if os.access(f, os.F_OK)]
42+
return filenames
43+
44+
45+
def remove_mapping_annotations(property: Section, max_depth: int = 5) -> None:
46+
"""
47+
Remove mapping annotations from the input section definition, all its quantities
48+
and sub-sections recursively.
49+
50+
Args:
51+
property (Section): The section definition to remove the annotations from.
52+
max_depth (int, optional): The maximum depth of the recursion for sub-sections
53+
using the same section as parent.
54+
"""
55+
56+
def _remove(property: Union[Section, SubSection], depth: int = 0):
57+
if depth > max_depth:
58+
return
59+
60+
annotation_key = 'mapping'
61+
property.m_annotations.pop(annotation_key, None)
62+
63+
depth += 1
64+
property_section = (
65+
property.sub_section if isinstance(property, SubSection) else property
66+
)
67+
for quantity in property_section.all_quantities.values():
68+
quantity.m_annotations.pop(annotation_key, None)
69+
70+
for sub_section in property_section.all_sub_sections.values():
71+
if sub_section.m_annotations.get(annotation_key):
72+
_remove(sub_section, depth)
73+
elif sub_section.sub_section.m_annotations.get(annotation_key):
74+
_remove(sub_section.sub_section, depth)
75+
else:
76+
for (
77+
inheriting_section
78+
) in sub_section.sub_section.all_inheriting_sections:
79+
if inheriting_section.m_annotations.get(annotation_key):
80+
_remove(inheriting_section, depth)
81+
82+
_remove(property)

0 commit comments

Comments
 (0)