Apply ruff formatting and fix linting errors

ndaelman-hu · ndaelman-hu · commit 002a4d533cc0 · 2026-02-04T11:32:02.000+01:00
Fix line length violations (E501) and magic value warnings (PLR2004) by:
- Breaking long docstrings and comments across multiple lines
- Extracting array indices to named variables with explanatory comments
diff --git a/src/nomad_simulation_parsers/parsers/vasp/outcar_parser.py b/src/nomad_simulation_parsers/parsers/vasp/outcar_parser.py
@@ -328,21 +328,42 @@ def str_to_eigenvalues(val_in):
                             ('TITEL', re.search(r'TITEL\s*=\s*(.+)', val_in)),
                             ('VRHFIN', re.search(r'VRHFIN\s*=(.+?)(?:\n|$)', val_in)),
                             ('LEXCH', re.search(r'LEXCH\s*=\s*(\w+)', val_in)),
-                            ('ZVAL', re.search(r'POMASS\s*=\s*[\d\.]+;\s*ZVAL\s*=\s*([\d\.]+)', val_in)),
+                            (
+                                'ZVAL',
+                                re.search(
+                                    r'POMASS\s*=\s*[\d\.]+;\s*ZVAL\s*=\s*([\d\.]+)',
+                                    val_in,
+                                ),
+                            ),
                             ('RCORE', re.search(r'RCORE\s*=\s*([\d\.]+)', val_in)),
                             ('ENMAX', re.search(r'ENMAX\s*=\s*([\d\.]+)', val_in)),
                             ('ENMIN', re.search(r'ENMIN\s*=\s*([\d\.]+)', val_in)),
                             ('LPAW', re.search(r'LPAW\s*=\s*([TF])', val_in)),
                             ('LULTRA', re.search(r'LULTRA\s*=\s*([TF])', val_in)),
-                            ('LMAX', re.search(r'number of l-projection\s+operators is LMAX\s*=\s*(\d+)', val_in)),
-                            ('LMMAX', re.search(r'number of lm-projection\s+operators is LMMAX\s*=\s*(\d+)', val_in)),
+                            (
+                                'LMAX',
+                                re.search(
+                                    r'number of l-projection\s+operators is '
+                                    r'LMAX\s*=\s*(\d+)',
+                                    val_in,
+                                ),
+                            ),
+                            (
+                                'LMMAX',
+                                re.search(
+                                    r'number of lm-projection\s+operators is '
+                                    r'LMMAX\s*=\s*(\d+)',
+                                    val_in,
+                                ),
+                            ),
                             ('SHA256', re.search(r'SHA256\s*=\s*(\w+)', val_in)),
                         ]
                         if value is not None
                         for key, value in [(key, value.group(1) if value else None)]
                         if value is not None
                     }
-                    if 'VRHFIN' in val_in else {}  # Only process detailed sections with VRHFIN
+                    if 'VRHFIN' in val_in
+                    else {}  # Only process detailed sections with VRHFIN
                 ),
                 convert=False,
             ),
@@ -494,18 +515,23 @@ def get_xc_functionals(self, parameters: dict[str, Any]) -> list[dict[str, Any]]
                 xc_functionals.append({'name': functional})
         return xc_functionals
 
-    def get_pseudopotentials(self, source: list[dict[str, Any]]) -> list[dict[str, Any]]:
+    def get_pseudopotentials(
+        self, source: list[dict[str, Any]]
+    ) -> list[dict[str, Any]]:
         """
-        Idiomatic transformer: Extract and derive all pseudopotential metadata from OUTCAR.
+        Idiomatic transformer: Extract and derive all pseudopotential metadata
+        from OUTCAR.
 
-        Performs ALL derivations here (type, XC functional, cutoffs) - no post-processing needed.
-        Returns plain dicts that the mapping framework converts to Pseudopotential instances.
+        Performs ALL derivations here (type, XC functional, cutoffs) - no
+        post-processing needed. Returns plain dicts that the mapping framework
+        converts to Pseudopotential instances.
 
         Args:
             source: List of raw POTCAR header dicts from OUTCAR parser
 
         Returns:
-            list[dict]: List of complete pseudopotential dicts ready for schema population
+            list[dict]: List of complete pseudopotential dicts ready for schema
+                population
         """
         pseudopotentials = []
 
@@ -517,7 +543,9 @@ def get_pseudopotentials(self, source: list[dict[str, Any]]) -> list[dict[str, A
             # Extract basic metadata
             pp_data = {
                 'name': raw_pp.get('TITEL'),
-                'n_valence_electrons': float(raw_pp['ZVAL']) if 'ZVAL' in raw_pp else None,
+                'n_valence_electrons': float(raw_pp['ZVAL'])
+                if 'ZVAL' in raw_pp
+                else None,
                 'reference_configuration': raw_pp.get('VRHFIN'),
                 'r_core': float(raw_pp['RCORE']) if 'RCORE' in raw_pp else None,
                 'l_max': int(raw_pp['LMAX']) if 'LMAX' in raw_pp else None,
@@ -527,10 +555,15 @@ def get_pseudopotentials(self, source: list[dict[str, Any]]) -> list[dict[str, A
 
             # Derive type from LPAW/LULTRA flags and name patterns (idiomatic)
             pp_data['type'] = self._derive_pp_type(raw_pp)
-            pp_data['is_norm_conserving'] = pp_data['type'] in ['NC', 'NC-PAW', 'NC-PAW-GW']
+            pp_data['is_norm_conserving'] = pp_data['type'] in [
+                'NC',
+                'NC-PAW',
+                'NC-PAW-GW',
+            ]
             pp_data['is_gw_optimized'] = '_GW' in (raw_pp.get('TITEL') or '')
 
-            # Derive XC functional dict (will be instantiated as XCFunctional subsection)
+            # Derive XC functional dict (will be instantiated as XCFunctional
+            # subsection)
             xc_key = self._derive_pp_xc_functional_key(raw_pp)
             if xc_key:
                 pp_data['xc_functional'] = {'functional_key': xc_key}
@@ -567,7 +600,8 @@ def _derive_pp_type(self, raw_pp: dict[str, Any]) -> str | None:
     def _derive_pp_xc_functional_key(self, raw_pp: dict[str, Any]) -> str | None:
         """Derive XC functional_key from LEXCH parameter using VASP mapping.
 
-        Returns standard functional aliases that will be expanded to LibXC components by normalization.
+        Returns standard functional aliases that will be expanded to LibXC
+        components by normalization.
         """
         lexch = raw_pp.get('LEXCH')
         if not lexch:
@@ -576,11 +610,11 @@ def _derive_pp_xc_functional_key(self, raw_pp: dict[str, Any]) -> str | None:
         # Map VASP LEXCH codes to standard functional aliases
         # These aliases are expanded to LibXC labels during XCFunctional.normalize()
         lexch_mapping = {
-            'PE': 'PBE',          # Perdew-Burke-Ernzerhof -> XC_GGA_X_PBE + XC_GGA_C_PBE
-            'CA': 'CA',           # Ceperley-Alder (Teter parametrization)
-            'PW': 'PW91',         # Perdew-Wang 91
-            'HL': 'HL',           # Hedin-Lundqvist
-            'WI': 'WI',           # Wigner interpolation
+            'PE': 'PBE',  # Perdew-Burke-Ernzerhof -> XC_GGA_X_PBE + XC_GGA_C_PBE
+            'CA': 'CA',  # Ceperley-Alder (Teter parametrization)
+            'PW': 'PW91',  # Perdew-Wang 91
+            'HL': 'HL',  # Hedin-Lundqvist
+            'WI': 'WI',  # Wigner interpolation
         }
 
         return lexch_mapping.get(lexch)
@@ -591,19 +625,23 @@ def _derive_pp_cutoffs(self, raw_pp: dict[str, Any]) -> list[dict[str, Any]]:
 
         enmax = raw_pp.get('ENMAX')
         if enmax is not None:
-            cutoffs.append({
-                'cutoff_kind': 'wavefunction',
-                'cutoff_role': 'recommended',
-                'value': float(enmax),
-            })
+            cutoffs.append(
+                {
+                    'cutoff_kind': 'wavefunction',
+                    'cutoff_role': 'recommended',
+                    'value': float(enmax),
+                }
+            )
 
         enmin = raw_pp.get('ENMIN')
         if enmin is not None:
-            cutoffs.append({
-                'cutoff_kind': 'wavefunction',
-                'cutoff_role': 'recommended_min',
-                'value': float(enmin),
-            })
+            cutoffs.append(
+                {
+                    'cutoff_kind': 'wavefunction',
+                    'cutoff_role': 'recommended_min',
+                    'value': float(enmin),
+                }
+            )
 
         return cutoffs
 
diff --git a/src/nomad_simulation_parsers/parsers/vasp/xml_parser.py b/src/nomad_simulation_parsers/parsers/vasp/xml_parser.py
@@ -1,6 +1,6 @@
-from typing import TYPE_CHECKING, Any
 import os
 from pathlib import Path as PathLib
+from typing import TYPE_CHECKING, Any
 
 import numpy as np
 
@@ -71,27 +71,36 @@ def reshape_array(self, source: np.ndarray, shape_rest: tuple = (3,)) -> np.ndar
             source, (np.size(source) // int(np.prod(shape_rest)), *shape_rest)
         )
 
-    def get_pseudopotentials_xml(self, arrays: list[dict[str, Any]]) -> list[dict[str, Any]]:
+    def get_pseudopotentials_xml(
+        self, arrays: list[dict[str, Any]]
+    ) -> list[dict[str, Any]]:
         """
-        Idiomatic transformer: Extract limited pseudopotential metadata from vasprun.xml.
+        Idiomatic transformer: Extract limited pseudopotential metadata from
+        vasprun.xml.
 
-        Note: vasprun.xml contains very limited pseudopotential information compared to OUTCAR.
-        Only name (TITEL) and valence electrons (ZVAL) are available. Other fields like type,
-        XC functional, and cutoffs must be supplemented from OUTCAR via multi-pass parsing.
+        Note: vasprun.xml contains very limited pseudopotential information
+        compared to OUTCAR. Only name (TITEL) and valence electrons (ZVAL) are
+        available. Other fields like type, XC functional, and cutoffs must be
+        supplemented from OUTCAR via multi-pass parsing.
 
         Args:
-            arrays: List of arrays from atominfo, need to filter for @name='atomtypes'
+            arrays: List of arrays from atominfo, need to filter for
+                @name='atomtypes'
 
         Returns:
-            list[dict]: List of pseudopotential dicts with limited metadata (name and n_valence_electrons only)
+            list[dict]: List of pseudopotential dicts with limited metadata
+                (name and n_valence_electrons only)
         """
         if not arrays:
             return []
 
         # Find the atomtypes array
         atomtypes_array = None
         for arr in arrays:
-            if isinstance(arr, dict) and arr.get(f'{self.attribute_prefix}name') == 'atomtypes':
+            if (
+                isinstance(arr, dict)
+                and arr.get(f'{self.attribute_prefix}name') == 'atomtypes'
+            ):
                 atomtypes_array = arr
                 break
 
@@ -101,7 +110,8 @@ def get_pseudopotentials_xml(self, arrays: list[dict[str, Any]]) -> list[dict[st
         pseudopotentials = []
 
         # Extract atomtypes data - vasprun.xml stores as array of rc/c elements
-        # Structure: array[@name='atomtypes'] -> set -> rc with c elements for atomspertype, element, pseudopotential, valence
+        # Structure: array[@name='atomtypes'] -> set -> rc with c elements for
+        # atomspertype, element, pseudopotential, valence
         for atomtype_set in [atomtypes_array]:
             if not isinstance(atomtype_set, dict):
                 continue
@@ -115,14 +125,22 @@ def get_pseudopotentials_xml(self, arrays: list[dict[str, Any]]) -> list[dict[st
                 if not isinstance(rc, dict):
                     continue
 
-                # Each rc has 'c' elements: c[0]=atomspertype, c[1]=element, c[2]=mass, c[3]=valence, c[4]=pseudopotential
+                # Each rc has 'c' elements: c[0]=atomspertype, c[1]=element,
+                # c[2]=mass, c[3]=valence, c[4]=pseudopotential
                 c_elements = rc.get('c', [])
-                if not isinstance(c_elements, list) or len(c_elements) < 5:
+                min_elements = 5  # Need indices 0-4 for all pseudopotential data
+                if not isinstance(c_elements, list) or len(c_elements) < min_elements:
                     continue
 
                 # Extract name and valence electrons from c elements
-                pp_name = c_elements[4] if len(c_elements) > 4 else None  # pseudopotential name
-                valence_str = c_elements[3] if len(c_elements) > 3 else None  # valence
+                idx_pp_name = 4  # Index for pseudopotential name
+                idx_valence = 3  # Index for valence electrons
+                pp_name = (
+                    c_elements[idx_pp_name] if len(c_elements) > idx_pp_name else None
+                )
+                valence_str = (
+                    c_elements[idx_valence] if len(c_elements) > idx_valence else None
+                )
 
                 # Parse valence electrons
                 n_valence = None
@@ -159,13 +177,18 @@ def write_to_archive(self) -> None:
         xml_parser.convert(data_parser)
 
         # Third pass: OUTCAR_KEY to extend with OUTCAR data if available
-        # This allows OUTCAR to supplement vasprun.xml pseudopotentials with detailed metadata
+        # This allows OUTCAR to supplement vasprun.xml pseudopotentials with
+        # detailed metadata
         outcar_path = self._find_outcar()
         if outcar_path and os.path.exists(outcar_path):
             LOGGER.info(
-                f"Found OUTCAR at {outcar_path}, extending vasprun.xml data with detailed pseudopotential metadata"
+                f'Found OUTCAR at {outcar_path}, extending vasprun.xml data '
+                'with detailed pseudopotential metadata'
+            )
+            from nomad_simulation_parsers.parsers.vasp.outcar_parser import (
+                OutcarParser,
+                OutcarTextParser,
             )
-            from nomad_simulation_parsers.parsers.vasp.outcar_parser import OutcarParser, OutcarTextParser
 
             outcar_parser = OutcarParser()
             outcar_parser.text_parser = OutcarTextParser()
diff --git a/src/nomad_simulation_parsers/schema_packages/vasp.py b/src/nomad_simulation_parsers/schema_packages/vasp.py
@@ -37,7 +37,9 @@ class Simulation(general.Simulation):
         'modeling.parameters.separator[?"@name"==\'electronic\']',
     )
     add_mapping_annotation(model_method.DFT.m_def, OUTCAR_KEY, 'parameters')
-    add_mapping_annotation(general.Simulation.model_system, XML_KEY, 'modeling.calculation')
+    add_mapping_annotation(
+        general.Simulation.model_system, XML_KEY, 'modeling.calculation'
+    )
     add_mapping_annotation(general.Simulation.model_system, OUTCAR_KEY, '.calculation')
     add_mapping_annotation(general.Simulation.outputs, XML_KEY, 'modeling.calculation')
     add_mapping_annotation(general.Simulation.outputs, XML2_KEY, 'modeling.calculation')
@@ -289,31 +291,31 @@ class ElectronicEigenvalues(outputs.ElectronicEigenvalues):
 
 class Pseudopotential(numerical_settings.Pseudopotential):
     """
-    VASP-specific pseudopotential metadata extracted from POTCAR headers in OUTCAR and vasprun.xml.
+    VASP-specific pseudopotential metadata extracted from POTCAR headers in
+    OUTCAR and vasprun.xml.
 
-    Extends base Pseudopotential class with SHA256 hash for POTCAR file identification.
-    All field derivations (type, XC functional, cutoffs) are performed idiomatically in parser
-    transformers, not in post-processing.
+    Extends base Pseudopotential class with SHA256 hash for POTCAR file
+    identification. All field derivations (type, XC functional, cutoffs) are
+    performed idiomatically in parser transformers, not in post-processing.
     """
 
-    from nomad.metainfo import Quantity
     import numpy as np
+    from nomad.metainfo import Quantity
 
     sha256 = Quantity(
         type=str,
         description="""
-        SHA256 hash of the POTCAR file content. Uniquely identifies the pseudopotential
-        file and enables verification that the correct POTCAR was used. This hash can be
-        matched against pseudopotential library databases for automatic library detection.
+        SHA256 hash of the POTCAR file content. Uniquely identifies the
+        pseudopotential file and enables verification that the correct POTCAR
+        was used. This hash can be matched against pseudopotential library
+        databases for automatic library detection.
         """,
     )
 
     # Field annotations: map dict keys from transformer to schema fields
     # All derivations (type, xc_functional, cutoffs) done in transformer
     add_mapping_annotation(numerical_settings.Pseudopotential.name, OUTCAR_KEY, '.name')
-    add_mapping_annotation(
-        numerical_settings.Pseudopotential.type, OUTCAR_KEY, '.type'
-    )
+    add_mapping_annotation(numerical_settings.Pseudopotential.type, OUTCAR_KEY, '.type')
     add_mapping_annotation(
         numerical_settings.Pseudopotential.n_valence_electrons,
         OUTCAR_KEY,
@@ -335,7 +337,10 @@ class Pseudopotential(numerical_settings.Pseudopotential):
         '.is_gw_optimized',
     )
     add_mapping_annotation(
-        numerical_settings.Pseudopotential.r_core, OUTCAR_KEY, '.r_core', unit='angstrom'
+        numerical_settings.Pseudopotential.r_core,
+        OUTCAR_KEY,
+        '.r_core',
+        unit='angstrom',
     )
     add_mapping_annotation(
         numerical_settings.Pseudopotential.l_max, OUTCAR_KEY, '.l_max'
@@ -354,19 +359,29 @@ class Pseudopotential(numerical_settings.Pseudopotential):
     # Map XC functional dict fields to XCFunctional schema fields
     # The functional_key will be expanded to components during normalization
     from nomad_simulations.schema_packages import model_method
-    add_mapping_annotation(model_method.XCFunctional.functional_key, OUTCAR_KEY, '.functional_key')
+
+    add_mapping_annotation(
+        model_method.XCFunctional.functional_key, OUTCAR_KEY, '.functional_key'
+    )
 
     # Cutoffs: map list of dicts to repeating subsection
-    # Transformer returns list like [{'cutoff_kind': 'wavefunction', 'cutoff_role': 'recommended', 'value': 172.237}, ...]
+    # Transformer returns list like [{'cutoff_kind': 'wavefunction',
+    # 'cutoff_role': 'recommended', 'value': 172.237}, ...]
     # The framework will create PPCutoff instances for each dict in the list
     add_mapping_annotation(
         numerical_settings.Pseudopotential.cutoffs, OUTCAR_KEY, '.cutoffs'
     )
 
     # Map cutoff dict fields to PPCutoff schema fields
-    add_mapping_annotation(numerical_settings.PPCutoff.cutoff_kind, OUTCAR_KEY, '.cutoff_kind')
-    add_mapping_annotation(numerical_settings.PPCutoff.cutoff_role, OUTCAR_KEY, '.cutoff_role')
-    add_mapping_annotation(numerical_settings.PPCutoff.value, OUTCAR_KEY, '.value', unit='eV')
+    add_mapping_annotation(
+        numerical_settings.PPCutoff.cutoff_kind, OUTCAR_KEY, '.cutoff_kind'
+    )
+    add_mapping_annotation(
+        numerical_settings.PPCutoff.cutoff_role, OUTCAR_KEY, '.cutoff_role'
+    )
+    add_mapping_annotation(
+        numerical_settings.PPCutoff.value, OUTCAR_KEY, '.value', unit='eV'
+    )
 
     add_mapping_annotation(sha256, OUTCAR_KEY, '.sha256')