diff --git a/src/pymatgen/io/vasp/outputs.py b/src/pymatgen/io/vasp/outputs.py index a79e5bd4ce4..c521062b2eb 100644 --- a/src/pymatgen/io/vasp/outputs.py +++ b/src/pymatgen/io/vasp/outputs.py @@ -2,23 +2,22 @@ from __future__ import annotations -import hashlib import itertools import math import os import re import warnings +import xml.etree.ElementTree as ET from collections import defaultdict from collections.abc import Iterable from dataclasses import dataclass +from datetime import datetime, timezone from glob import glob -from io import BytesIO +from io import StringIO from pathlib import Path -from typing import TYPE_CHECKING, Any, cast -from xml.etree import ElementTree as ET +from typing import TYPE_CHECKING, cast import numpy as np -from monty.dev import requires from monty.io import reverse_readfile, zopen from monty.json import MSONable, jsanitize from monty.os.path import zpath @@ -38,19 +37,14 @@ from pymatgen.entries.computed_entries import ComputedEntry, ComputedStructureEntry from pymatgen.io.common import VolumetricData as BaseVolumetricData from pymatgen.io.core import ParseError -from pymatgen.io.vasp.inputs import Incar, Kpoints, KpointsSupportedModes, Poscar, Potcar +from pymatgen.io.vasp.inputs import Incar, Kpoints, Poscar, Potcar from pymatgen.io.wannier90 import Unk from pymatgen.util.io_utils import clean_lines, micro_pyawk from pymatgen.util.num import make_symmetric_matrix_from_upper_tri -try: - import h5py -except ImportError: - h5py = None - if TYPE_CHECKING: - from collections.abc import Callable, Iterator, Sequence - from typing import Literal, TypeAlias + from collections.abc import Callable + from typing import Any, Literal # Avoid name conflict with pymatgen.core.Element from xml.etree.ElementTree import Element as XML_Element @@ -58,7 +52,7 @@ from numpy.typing import NDArray from typing_extensions import Self - from pymatgen.util.typing import Kpoint, PathLike + from pymatgen.util.typing import Kpoint, PathLike, Tuple3Floats, Vector3D def _parse_parameters(val_type: str, val: str) -> bool | str | float | int: @@ -133,15 +127,10 @@ def _parse_v_parameters( return floats -def _parse_vasp_array(elem) -> list[list[bool]] | NDArray[np.float64]: +def _parse_vasp_array(elem) -> list[list[float]]: if elem.get("type") == "logical": return [[i == "T" for i in v.text.split()] for v in elem] - - try: - # numerical data, try parse with numpy loadtxt for efficiency: - return np.loadtxt([e.text for e in elem], ndmin=2, dtype=np.float64) - except ValueError: # unexpectedly couldn't re-shape to grid - return np.array([list(map(_vasprun_float, e.text.split())) for e in elem]) + return [[_vasprun_float(i) for i in v.text.split()] for v in elem] def _parse_from_incar(filename: PathLike, key: str) -> Any: @@ -164,7 +153,9 @@ def _vasprun_float(flt: float | str) -> float: return float(flt) except ValueError: - if "*" in str(flt): + flt = cast("str", flt) + _flt: str = flt.strip() + if _flt == "*" * len(_flt): warnings.warn( "Float overflow (*******) encountered in vasprun", stacklevel=2, @@ -183,24 +174,13 @@ class KpointOptProps: efermi: float | None = None eigenvalues: dict | None = None projected_eigenvalues: dict | None = None - projected_magnetisation: NDArray | None = None + projected_magnetisation: np.ndarray | None = None kpoints: Kpoints | None = None actual_kpoints: list | None = None actual_kpoints_weights: list | None = None dos_has_errors: bool | None = None -@dataclass -class BandgapProps(MSONable): - vbm: float | None = None - cbm: float | None = None - direct_gap_eigenvalues: tuple[float, float] | None = None - efermi: float | None = None - vbm_k: tuple[float, float, float] | None = None - cbm_k: tuple[float, float, float] | None = None - direct_gap_k: tuple[float, float, float] | None = None - - class Vasprun(MSONable): """ Vastly improved cElementTree-based parser for vasprun.xml files. Uses @@ -224,7 +204,7 @@ class Vasprun(MSONable): To access a particular value, you need to do Vasprun.projected_eigenvalues[spin][kpoint index][band index][atom index][orbital_index]. The kpoint, band and atom indices are 0-based (unlike the 1-based indexing in VASP). - projected_magnetisation (NDArray): Final projected magnetization as a numpy array with the + projected_magnetisation (np.array): Final projected magnetization as a numpy array with the shape (nkpoints, nbands, natoms, norbitals, 3). Where the last axis is the contribution in the 3 Cartesian directions. This attribute is only set if spin-orbit coupling (LSORBIT = True) or non-collinear magnetism (LNONCOLLINEAR = True) is turned on in the INCAR. @@ -237,10 +217,10 @@ class Vasprun(MSONable): The data can be the current, density or freq_dependent (BSE) dielectric data. nionic_steps (int): The total number of ionic steps. This number is always equal to the total number of steps in the actual run even if ionic_step_skip is used. - force_constants (NDArray): Force constants computed in phonon DFPT run(IBRION = 8). - The data is a 4D array of shape (natoms, natoms, 3, 3). - normalmode_eigenvals (NDArray): Normal mode frequencies. 1D array of size 3*natoms. - normalmode_eigenvecs (NDArray): Normal mode eigen vectors. 3D array of shape (3*natoms, natoms, 3). + force_constants (np.array): Force constants computed in phonon DFPT run(IBRION = 8). + The data is a 4D numpy array of shape (natoms, natoms, 3, 3). + normalmode_eigenvals (np.array): Normal mode frequencies. 1D numpy array of size 3*natoms. + normalmode_eigenvecs (np.array): Normal mode eigen vectors. 3D numpy array of shape (3*natoms, natoms, 3). md_data (list): Available only for ML MD runs, i.e., INCAR with ML_LMLFF = .TRUE. md_data is a list of dict with the following format: [{'energy': {'e_0_energy': -525.07195568, 'e_fr_energy': -525.07195568, 'e_wo_entrp': -525.07195568, 'kinetic': 3.17809233, 'lattice kinetic': 0.0, 'nosekinetic': 1.323e-5, @@ -334,7 +314,7 @@ def __init__( with zopen(filename, mode="rt", encoding="utf-8") as file: if ionic_step_skip or ionic_step_offset: # Remove parts of the xml file and parse the string - content: str = file.read() # type:ignore[assignment] + content: str = file.read() steps: list[str] = content.split("") # The text before the first is the preamble! @@ -349,7 +329,7 @@ def __init__( else: to_parse = f"{preamble}{to_parse}" self._parse( - BytesIO(to_parse.encode("utf-8")), + StringIO(to_parse), parse_dos=parse_dos, parse_eigen=parse_eigen, parse_projected_eigen=parse_projected_eigen, @@ -389,14 +369,13 @@ def _parse( self.projected_eigenvalues: dict[Any, NDArray] | None = None self.projected_magnetisation: NDArray | None = None self.dielectric_data: dict[str, tuple] = {} - self.incar: Incar = Incar({}) + self.incar: Incar = {} self.kpoints_opt_props: KpointOptProps | None = None ionic_steps: list[dict[str, Any]] = [] md_data: list[dict] = [] parsed_header: bool = False in_kpoints_opt: bool = False - ml_run: bool = False try: # When parsing XML, start tags tell us when we have entered a block # while end tags are when we have actually read the data. @@ -425,7 +404,6 @@ def _parse( self.generator = self._parse_params(elem) elif tag == "incar": self.incar = self._parse_params(elem) - ml_run = self.incar.get("ML_LMLFF") elif tag == "kpoints": if not hasattr(self, "kpoints"): ( @@ -526,7 +504,7 @@ def _parse( self.dielectric_data[label] = self._parse_diel(elem) elif tag == "varray" and elem.attrib.get("name") == "opticaltransitions": - self.optical_transition = _parse_vasp_array(elem) + self.optical_transition = np.array(_parse_vasp_array(elem)) elif tag == "structure" and elem.attrib.get("name") == "finalpos": self.final_structure = self._parse_structure(elem) @@ -536,7 +514,7 @@ def _parse( # n_atoms is not the total number of atoms, only those for which force constants were calculated # https://github.com/materialsproject/pymatgen/issues/3084 n_atoms = len(hessian) // 3 - hessian = np.array(hessian) # type:ignore[assignment] + hessian = np.array(hessian) self.force_constants = np.zeros((n_atoms, n_atoms, 3, 3), dtype="double") for ii in range(n_atoms): for jj in range(n_atoms): @@ -547,7 +525,7 @@ def _parse( self.normalmode_eigenvals = np.array(eigenvalues) self.normalmode_eigenvecs = np.array(phonon_eigenvectors) - elif ml_run: + elif self.incar.get("ML_LMLFF"): if tag == "structure" and elem.attrib.get("name") is None: md_data.append({}) md_data[-1]["structure"] = self._parse_structure(elem) @@ -804,13 +782,13 @@ def run_type(self) -> str: 4: "dDsC", } - if math.isclose(self.parameters.get("AEXX", 1.00), 1.00): + if self.parameters.get("AEXX", 1.00) == 1.00: run_type = "HF" - elif math.isclose(self.parameters.get("HFSCREEN", 0.30), 0.30): + elif self.parameters.get("HFSCREEN", 0.30) == 0.30: run_type = "HSE03" - elif math.isclose(self.parameters.get("HFSCREEN", 0.20), 0.20): + elif self.parameters.get("HFSCREEN", 0.20) == 0.20: run_type = "HSE06" - elif math.isclose(self.parameters.get("AEXX", 0.20), 0.20): + elif self.parameters.get("AEXX", 0.20) == 0.20: run_type = "B3LYP" elif self.parameters.get("LHFCALC", True): run_type = "PBEO or other Hybrid Functional" @@ -889,11 +867,7 @@ def get_computed_entry( ComputedStructureEntry/ComputedEntry """ if entry_id is None: - calc_date = re.sub(" ", "", self.generator["DATE"]) - calc_time = self.generator["TIME"] - hashed_structure = hashlib.md5(str(self.final_structure).encode("utf-8")).hexdigest() # noqa: S324 - - entry_id = f"vasprun-{calc_date}-{calc_time}-{hashed_structure}" + entry_id = f"vasprun-{datetime.now(tz=timezone.utc)}" param_names = { "is_hubbard", "hubbards", @@ -1053,7 +1027,7 @@ def get_band_structure( if (hybrid_band or force_hybrid_mode) and not use_kpoints_opt: start_bs_index = 0 for i in range(len(self.actual_kpoints)): - if math.isclose(self.actual_kpoints_weights[i], 0.0): + if self.actual_kpoints_weights[i] == 0.0: start_bs_index = i break for i in range(start_bs_index, len(kpoint_file.kpts)): @@ -1087,13 +1061,13 @@ def get_band_structure( labels_dict.pop(None, None) # type: ignore[call-overload] return BandStructureSymmLine( - np.array(kpoints), + kpoints, eigenvals, lattice_new, e_fermi, - labels_dict, + labels_dict, # type: ignore[arg-type] structure=self.final_structure, - projections=p_eig_vals, # type:ignore[arg-type] + projections=p_eig_vals, ) return BandStructure( @@ -1102,7 +1076,7 @@ def get_band_structure( lattice_new, e_fermi, structure=self.final_structure, - projections=p_eig_vals, # type:ignore[arg-type] + projections=p_eig_vals, ) @property @@ -1191,7 +1165,7 @@ def calculate_efermi(self, tol: float = 0.001) -> float: def crosses_band(fermi: float) -> bool: eigs_below = np.any(all_eigs < fermi, axis=1) eigs_above = np.any(all_eigs > fermi, axis=1) - return np.any(eigs_above & eigs_below) # type:ignore[return-value] + return np.any(eigs_above & eigs_below) def get_vbm_cbm(fermi): return np.max(all_eigs[all_eigs < fermi]), np.min(all_eigs[all_eigs > fermi]) @@ -1277,13 +1251,17 @@ def update_potcar_spec(self, path: PathLike | bool) -> None: Args: path (PathLike | bool): Path to search for POTCARs. - - Note that the vasprun.xml spec typically hasn't included the POTCAR symbols, - since these are derived from the TITEL. """ if potcar := self.get_potcars(path): self.potcar_spec = [ - ps.spec(extra_spec=[]) for sym in self.potcar_symbols for ps in potcar if ps.symbol == sym.split()[1] + { + "titel": sym, + "hash": ps.md5_header_hash, + "summary_stats": ps._summary_stats, + } + for sym in self.potcar_symbols + for ps in potcar + if ps.symbol == sym.split()[1] ] def update_charge_from_potcar(self, path: PathLike | bool) -> None: @@ -1434,7 +1412,7 @@ def as_dict(self) -> dict: dct["output"] = vout return jsanitize(dct, strict=True) - def _parse_params(self, elem: XML_Element) -> Incar: + def _parse_params(self, elem: XML_Element) -> Incar[str, Any]: """Parse INCAR parameters and more.""" params: dict[str, Any] = {} for c in elem: @@ -1446,7 +1424,7 @@ def _parse_params(self, elem: XML_Element) -> Incar: if name == "response functions": # Delete duplicate fields from "response functions", # which overrides the values in the root params. - p = {k: v for k, v in p.items() if k not in params} # type:ignore[assignment] + p = {k: v for k, v in p.items() if k not in params} params |= p else: @@ -1499,10 +1477,9 @@ def parse_atomic_symbol(symbol: str) -> str: @staticmethod def _parse_kpoints( elem: XML_Element, - ) -> tuple[Kpoints, list[tuple[float, float, float]], list[float]]: + ) -> tuple[Kpoints, list[Tuple3Floats], list[float]]: """Parse Kpoints.""" - gen = elem.find("generation") - e = elem if gen is None else gen + e = elem if elem.find("generation") is None else elem.find("generation") kpoint = Kpoints("Kpoints from vasprun.xml") kpoint.style = Kpoints.supported_modes.from_str(e.attrib.get("param", "Reciprocal")) # type: ignore[union-attr] @@ -1515,7 +1492,7 @@ def _parse_kpoints( cast("Kpoint", tuple(int(i) for i in tokens)), ] elif name == "usershift": - kpoint.kpts_shift = cast("tuple[float, float, float]", tuple(float(i) for i in tokens)) + kpoint.kpts_shift = cast("Vector3D", tuple(float(i) for i in tokens)) elif name in {"genvec1", "genvec2", "genvec3", "shift"}: setattr(kpoint, name, [float(i) for i in tokens]) @@ -1524,12 +1501,9 @@ def _parse_kpoints( for va in elem.findall("varray"): name = va.attrib["name"] if name == "kpointlist": - actual_kpoints = cast("list[tuple[float, float, float]]", list(map(tuple, _parse_vasp_array(va)))) # type:ignore[arg-type] + actual_kpoints = cast("list[Tuple3Floats]", list(map(tuple, _parse_vasp_array(va)))) elif name == "weights": - weights_array = _parse_vasp_array(va) - if isinstance(weights_array, np.ndarray): - weights_array = weights_array.flatten() - weights = list(weights_array) + weights = [i[0] for i in _parse_vasp_array(va)] elem.clear() if kpoint.style == Kpoints.supported_modes.Reciprocal: @@ -1540,7 +1514,7 @@ def _parse_kpoints( kpts=actual_kpoints, kpts_weights=weights, ) - return kpoint, actual_kpoints, weights # type:ignore[return-value] + return kpoint, actual_kpoints, weights def _parse_structure(self, elem: XML_Element) -> Structure: """Parse Structure with lattice, positions and selective dynamics info.""" @@ -1556,17 +1530,16 @@ def _parse_structure(self, elem: XML_Element) -> Structure: @staticmethod def _parse_diel(elem: XML_Element) -> tuple[list, list, list]: """Parse dielectric properties.""" - real_elem = elem.find("real") - imag_elem = elem.find("imag") - if real_elem is not None and imag_elem is not None: + if elem.find("real") is not None and elem.find("imag") is not None: imag = [ [_vasprun_float(line) for line in r.text.split()] # type: ignore[union-attr] - for r in imag_elem.find("array").find("set").findall("r") # type: ignore[union-attr] + for r in elem.find("imag").find("array").find("set").findall("r") # type: ignore[union-attr] ] real = [ [_vasprun_float(line) for line in r.text.split()] # type: ignore[union-attr] - for r in real_elem.find("array").find("set").findall("r") # type: ignore[union-attr] + for r in elem.find("real").find("array").find("set").findall("r") # type: ignore[union-attr] ] + elem.clear() return [e[0] for e in imag], [e[1:] for e in real], [e[1:] for e in imag] return [], [], [] @@ -1576,10 +1549,10 @@ def _parse_optical_transition(elem: XML_Element) -> tuple[NDArray, NDArray]: for va in elem.findall("varray"): if va.attrib.get("name") == "opticaltransitions": # optical transitions array contains oscillator strength and probability of transition - oscillator_strength = _parse_vasp_array(va)[:] - probability_transition = _parse_vasp_array(va)[:, 1] - elem.clear() - return oscillator_strength, probability_transition # type:ignore[return-value] + oscillator_strength = np.array(_parse_vasp_array(va))[:] + probability_transition = np.array(_parse_vasp_array(va))[:, 1] + + return oscillator_strength, probability_transition raise RuntimeError("Failed to parse optical transitions.") @@ -1592,7 +1565,6 @@ def _parse_chemical_shielding(self, elem: XML_Element) -> list[dict[str, Any]]: for va in elem.findall("varray"): istep[va.attrib["name"]] = _parse_vasp_array(va) - istep["structure"] = struct istep["electronic_steps"] = [] calculation = [ @@ -1614,7 +1586,6 @@ def _parse_chemical_shielding(self, elem: XML_Element) -> list[dict[str, Any]]: except AttributeError: # not all calculations have an energy pass calculation[-1].update(calculation[-1]["electronic_steps"][-1]) - elem.clear() return calculation def _parse_ionic_step(self, elem: XML_Element) -> dict[str, float]: @@ -1659,7 +1630,7 @@ def _parse_dos(elem: XML_Element) -> tuple[Dos, Dos, list[dict]]: soc_run = len(elem.find("total").find("array").find("set").findall("set")) > 2 # type: ignore[union-attr] for s in elem.find("total").find("array").find("set").findall("set"): # type: ignore[union-attr] - data = _parse_vasp_array(s) + data = np.array(_parse_vasp_array(s)) energies = data[:, 0] spin = Spin.up if s.attrib["comment"] == "spin 1" else Spin.down if spin != Spin.up and soc_run: # other 'spins' are x,y,z SOC projections @@ -1674,13 +1645,13 @@ def _parse_dos(elem: XML_Element) -> tuple[Dos, Dos, list[dict]]: orbs.pop(0) lm = any("x" in s for s in orbs if s is not None) for s in partial.find("array").find("set").findall("set"): # type: ignore[union-attr] - pdos: dict[Orbital | OrbitalType, dict[Spin, NDArray]] = defaultdict(dict) + pdos: dict[Orbital | OrbitalType, dict[Spin, np.ndarray]] = defaultdict(dict) for ss in s.findall("set"): spin = Spin.up if ss.attrib["comment"] == "spin 1" else Spin.down if spin != Spin.up and soc_run: # other 'spins' are x,y,z SOC projections continue - data = _parse_vasp_array(ss) + data = np.array(_parse_vasp_array(ss)) _n_row, n_col = data.shape for col_idx in range(1, n_col): orb = Orbital(col_idx - 1) if lm else OrbitalType(col_idx - 1) @@ -1699,7 +1670,7 @@ def _parse_dos(elem: XML_Element) -> tuple[Dos, Dos, list[dict]]: @staticmethod def _parse_eigen(elem: XML_Element) -> dict[Spin, NDArray]: """Parse eigenvalues.""" - eigenvalues: dict[Spin, NDArray] = defaultdict(list) # type:ignore[arg-type] + eigenvalues: dict[Spin, np.ndarray] = defaultdict(list) for s in elem.find("array").find("set").findall("set"): # type: ignore[union-attr] spin = Spin.up if s.attrib["comment"] == "spin 1" else Spin.down for ss in s.findall("set"): @@ -1714,7 +1685,7 @@ def _parse_projected_eigen( ) -> tuple[dict[Spin, NDArray], NDArray | None]: """Parse projected eigenvalues.""" root = elem.find("array").find("set") # type: ignore[union-attr] - _proj_eigen: dict[int, NDArray] = defaultdict(list) # type:ignore[arg-type] + _proj_eigen: dict[int, np.ndarray] = defaultdict(list) for s in root.findall("set"): # type: ignore[union-attr] spin: int = int(re.match(r"spin(\d+)", s.attrib["comment"])[1]) # type: ignore[index] @@ -1732,7 +1703,7 @@ def _parse_projected_eigen( # "spin channels" are the projected magnetization of the orbitals in the # x, y, and z Cartesian coordinates proj_mag = np.stack([_proj_eigen.pop(i) for i in range(2, 5)], axis=-1) # type: ignore[call-overload] - proj_eigen: dict[Spin, NDArray] = {Spin.up: _proj_eigen[1]} + proj_eigen: dict[Spin, np.ndarray] = {Spin.up: _proj_eigen[1]} else: proj_eigen = {Spin.up if k == 1 else Spin.down: v for k, v in _proj_eigen.items()} proj_mag = None @@ -1759,6 +1730,7 @@ def _parse_dynmat(elem: XML_Element) -> tuple[list, list, list]: elif va.attrib["name"] == "eigenvectors": for v in va.findall("v"): eigenvectors.append([float(i) for i in v.text.split()]) # type: ignore[union-attr] + return hessian, eigenvalues, eigenvectors @@ -1810,7 +1782,7 @@ def __init__( self.kpoints_opt_props = None for event, elem in ET.iterparse(file, events=["start", "end"]): tag = elem.tag - if event == "start" and not in_kpoints_opt: + if event == "start": # The start event tells us when we have entered blocks if tag in {"eigenvalues_kpoints_opt", "projected_kpoints_opt"} or ( tag == "dos" and elem.attrib.get("comment") == "kpoints_opt" @@ -1959,82 +1931,62 @@ def as_dict(self) -> dict: class Outcar: - """Parser for data in OUTCAR that is not available in vasprun.xml. + """Parser for data in OUTCAR that is not available in Vasprun.xml. Note, this class works a bit differently than most of the other - VASP parsers, since OUTCAR can be very different depending on which + VASP objects, since OUTCAR can be very different depending on which "type of run" performed. - Creating an Outcar instance with a filename reads "regular parameters" that - are always present. One can then call a specific reader method depending on the - type of run being performed, including (see the docstring of corresponding - method for more details): - - read_avg_core_poten - - read_chemical_shielding - - read_core_state_eigen - - read_corrections - - read_cs_core_contribution - - read_cs_g0_contribution - - read_cs_raw_symmetrized_tensors - - read_elastic_tensor - - read_electrostatic_potential - - read_fermi_contact_shift - - read_freq_dielectric - - read_igpar - - read_internal_strain_tensor - - read_lcalcpol - - read_lepsilon - - read_lepsilon_ionic - - read_neb - - read_nmr_efg - - read_nmr_efg_tensor - - read_onsite_density_matrices - - read_piezo_tensor - - read_pseudo_zval - - read_table_pattern + Create the OUTCAR class with a filename reads "regular parameters" that + are always present. Attributes: - magnetization (tuple[dict[str, float]]): Magnetization on each ion, e.g. - ({"d": 0.0, "p": 0.003, "s": 0.002, "tot": 0.005}, ... ). - chemical_shielding (dict): Chemical shielding on each ion with core and valence contributions. - unsym_cs_tensor (list): Unsymmetrized chemical shielding tensor matrixes on each ion. + magnetization (tuple): Magnetization on each ion as a tuple of dict, e.g. + ({"d": 0.0, "p": 0.003, "s": 0.002, "tot": 0.005}, ... ) + chemical_shielding (dict): Chemical shielding on each ion as a dictionary with core and valence contributions. + unsym_cs_tensor (list): Unsymmetrized chemical shielding tensor matrixes on each ion as a list. e.g. [[[sigma11, sigma12, sigma13], [sigma21, sigma22, sigma23], [sigma31, sigma32, sigma33]], ...] - cs_g0_contribution (NDArray): G=0 contribution to chemical shielding. 2D rank 3 matrix. - cs_core_contribution (dict[str, float]): Core contribution to chemical shielding. e.g. + cs_g0_contribution (np.array): G=0 contribution to chemical shielding. 2D rank 3 matrix. + cs_core_contribution (dict): Core contribution to chemical shielding. dict. e.g. {'Mg': -412.8, 'C': -200.5, 'O': -271.1} - efg (tuple[dict[str, float]]): Electric Field Gradient (EFG) tensor on each ion, e.g. + efg (tuple): Electric Field Gradient (EFG) tensor on each ion as a tuple of dict, e.g. ({"cq": 0.1, "eta", 0.2, "nuclear_quadrupole_moment": 0.3}, {"cq": 0.7, "eta", 0.8, "nuclear_quadrupole_moment": 0.9}, ...) - charge (tuple[dict[str, float]]): Charge on each ion, e.g. + charge (tuple): Charge on each ion as a tuple of dict, e.g. ({"p": 0.154, "s": 0.078, "d": 0.0, "tot": 0.232}, ...) is_stopped (bool): True if OUTCAR is from a stopped run (using STOPCAR, see VASP Manual). - run_stats (dict[str, float | None]): Various useful run stats including "System time (sec)", - "Total CPU time used (sec)", "Elapsed time (sec)", "Maximum memory used (kb)", - "Average memory used (kb)", "User time (sec)", "cores". - elastic_tensor (NDArray): Total elastic moduli (Kbar) is given in a 6x6 array matrix. - drift (NDArray): Total drift for each step in eV/Atom. + run_stats (dict): Various useful run stats as a dict including "System time (sec)", "Total CPU time used (sec)", + "Elapsed time (sec)", "Maximum memory used (kb)", "Average memory used (kb)", "User time (sec)", "cores". + elastic_tensor (np.array): Total elastic moduli (Kbar) is given in a 6x6 array matrix. + drift (np.array): Total drift for each step in eV/Atom. ngf (tuple): Dimensions for the Augmentation grid. - sampling_radii (NDArray): Size of the sampling radii in VASP for the test charges for the electrostatic + sampling_radii (np.array): Size of the sampling radii in VASP for the test charges for the electrostatic potential at each atom. Total array size is the number of elements present in the calculation. - electrostatic_potential (NDArray): Average electrostatic potential at each atomic position in order of + electrostatic_potential (np.array): Average electrostatic potential at each atomic position in order of the atoms in POSCAR. - final_energy_contribs (dict[str, float]): Individual contributions to the total final energy. + final_energy_contribs (dict): Individual contributions to the total final energy as a dictionary. Include contributions from keys, e.g.: {'DENC': -505778.5184347, 'EATOM': 15561.06492564, 'EBANDS': -804.53201231, 'EENTRO': -0.08932659, 'EXHF': 0.0, 'Ediel_sol': 0.0, 'PAW double counting': 664.6726974100002, 'PSCENC': 742.48691646, 'TEWEN': 489742.86847338, 'XCENC': -169.64189814} efermi (float): Fermi energy. - filename (PathLike): Filename. + filename (str): Filename. final_energy (float): Final energy after extrapolation of sigma back to 0, i.e. energy(sigma->0). final_energy_wo_entrp (float): Final energy before extrapolation of sigma, i.e. energy without entropy. final_fr_energy (float): Final "free energy", i.e. free energy TOTEN. has_onsite_density_matrices (bool): Whether onsite density matrices have been set. lcalcpol (bool): If LCALCPOL has been set. lepsilon (bool): If LEPSILON has been set. - nelect (float): The number of electrons in the calculation. - spin (bool): If spin-polarization is enabled via ISPIN. + nelect (float): Returns the number of electrons in the calculation. + spin (bool): If spin-polarization was enabled via ISPIN. total_mag (float): Total magnetization (in terms of the number of unpaired electrons). + One can then call a specific reader depending on the type of run being + performed. These are currently: read_igpar(), read_lepsilon() and + read_lcalcpol(), read_core_state_eign(), read_avg_core_pot(). + + See the documentation of those methods for more documentation. + Authors: Rickard Armiento, Shyue Ping Ong """ @@ -2043,27 +1995,22 @@ def __init__(self, filename: PathLike) -> None: Args: filename (PathLike): OUTCAR file to parse. """ - self.filename: str = str(filename) - self.is_stopped: bool = False + self.filename = filename + self.is_stopped = False # Assume a compilation with parallelization enabled. # Will be checked later. # If VASP is compiled in serial, the OUTCAR is written slightly differently. - serial_compilation: bool = False + serial_compilation = False - # Data from the end of OUTCAR + # data from end of OUTCAR charge = [] mag_x = [] mag_y = [] mag_z = [] header = [] run_stats: dict[str, float | None] = {} - total_mag: float | None = None - nelect: float | None = None - efermi: float | None = None - e_fr_energy: float | None = None - e_wo_entrp: float | None = None - e0: float | None = None + total_mag = nelect = efermi = e_fr_energy = e_wo_entrp = e0 = None time_patt = re.compile(r"\((sec|kb)\)") efermi_patt = re.compile(r"E-fermi\s*:\s*(\S+)") @@ -2113,8 +2060,7 @@ def __init__(self, filename: PathLike) -> None: e_wo_entrp = float(match[1]) if e0 is None and (match := e0_pattern.search(clean)): e0 = float(match[1]) - - if nelect is not None and total_mag is not None and efermi is not None and run_stats: + if all([nelect, total_mag is not None, efermi is not None, run_stats]): break # For single atom systems, VASP doesn't print a total line, so @@ -2176,12 +2122,12 @@ def __init__(self, filename: PathLike) -> None: for idx in range(len(mag_x)): mag.append({key: Magmom([mag_x[idx][key], mag_y[idx][key], mag_z[idx][key]]) for key in mag_x[0]}) else: - mag = mag_x # type:ignore[assignment] + mag = mag_x # Data from beginning of OUTCAR run_stats["cores"] = None with zopen(filename, mode="rt", encoding="utf-8") as file: - for line in file: # type:ignore[assignment] + for line in file: if "serial" in line: # Activate serial parallelization run_stats["cores"] = 1 @@ -2267,9 +2213,32 @@ def __init__(self, filename: PathLike) -> None: ) if self.data.get("ibrion", [[0]])[0][0] > 6: self.dfpt = True + self.fd = False + self.read_internal_strain_tensor() + elif self.data.get("ibrion", [[0]])[0][0] in [5, 6]: + self.dfpt = False + self.fd = True self.read_internal_strain_tensor() else: self.dfpt = False + self.fd = False + + # Check for variable cell calculation + self.read_pattern( + {"isif": r"ISIF\s+=\s+([\-\d]+)"}, + terminate_on_match=True, + postprocess=int, + ) + if self.data.get("isif", [[0]])[0][0] == 3: + self.varcell = True + else: + self.varcell = False + + # Check if LRPA is True + self.lrpa = False + self.read_pattern({"rpa": r"LRPA\s*=\s*T"}) + if self.data.get("rpa", False): + self.lrpa = True # Check if LEPSILON is True and read piezo data if so self.read_pattern({"epsilon": r"LEPSILON\s*=\s*T"}) @@ -2282,6 +2251,21 @@ def __init__(self, filename: PathLike) -> None: else: self.lepsilon = False + # Read total elastic tensor if FD (ionic) and variable cell (electronic) + if self.fd and self.varcell: + self.read_elastic_tensor() + # Check if LCALCEPS is True and read piezo data if so + self.read_pattern({"calceps": r"LCALCEPS\s*=\s*T"}) + if self.data.get("calceps", False): + self.lcalceps = True + # self.read_lcalceps() + self.read_lepsilon() + # Only read ionic contribution if DFPT is turned on + if self.fd: + self.read_lepsilon_ionic() + else: + self.lcalceps = False + # Check if LCALCPOL is True and read polarization data if so self.read_pattern({"calcpol": r"LCALCPOL\s*=\s*T"}) if self.data.get("calcpol", False): @@ -2293,7 +2277,7 @@ def __init__(self, filename: PathLike) -> None: # Read electrostatic potential self.electrostatic_potential: list[float] | None = None - self.ngf: list[int] | None = None + self.ngf = None self.sampling_radii: list[float] | None = None self.read_pattern({"electrostatic": r"average \(electrostatic\) potential at core"}) if self.data.get("electrostatic", False): @@ -2301,7 +2285,7 @@ def __init__(self, filename: PathLike) -> None: self.read_pattern({"nmr_cs": r"LCHIMAG\s*=\s*(T)"}) if self.data.get("nmr_cs"): - self.nmr_cs: bool = True + self.nmr_cs = True self.read_chemical_shielding() self.read_cs_g0_contribution() self.read_cs_core_contribution() @@ -2311,7 +2295,7 @@ def __init__(self, filename: PathLike) -> None: self.read_pattern({"nmr_efg": r"NMR quadrupolar parameters"}) if self.data.get("nmr_efg"): - self.nmr_efg: bool = True + self.nmr_efg = True self.read_nmr_efg() self.read_nmr_efg_tensor() else: @@ -2322,7 +2306,7 @@ def __init__(self, filename: PathLike) -> None: terminate_on_match=True, ) if "has_onsite_density_matrices" in self.data: - self.has_onsite_density_matrices: bool = True + self.has_onsite_density_matrices = True self.read_onsite_density_matrices() else: self.has_onsite_density_matrices = False @@ -2350,90 +2334,6 @@ def __init__(self, filename: PathLike) -> None: final_energy_contribs[key] = sum(map(float, self.data[key][-1])) self.final_energy_contribs = final_energy_contribs - @staticmethod - def _parse_sci_notation(line: str) -> list[float]: - """ - Parse lines with values in scientific notation and potentially - without spaces in between the values. This assumes that the scientific - notation always lists two digits for the exponent, e.g. 3.535E-02. - - Args: - line: line to parse. - - Returns: - list[float]: numbers if found, empty list if not. - """ - if match := re.findall(r"[\.\-\d]+E[\+\-]\d{2}", line): - return [float(t) for t in match] - return [] - - def as_dict(self) -> dict[str, Any]: - """MSONable dict.""" - dct = { - "@module": type(self).__module__, - "@class": type(self).__name__, - "efermi": self.efermi, - "run_stats": self.run_stats, - "magnetization": self.magnetization, - "charge": self.charge, - "total_magnetization": self.total_mag, - "nelect": self.nelect, - "is_stopped": self.is_stopped, - "drift": self.drift, - "ngf": self.ngf, - "sampling_radii": self.sampling_radii, - "electrostatic_potential": self.electrostatic_potential, - } - - if self.lepsilon: - dct |= { - "piezo_tensor": self.piezo_tensor, - "dielectric_tensor": self.dielectric_tensor, - "born": self.born, - } - - if self.dfpt: - dct["internal_strain_tensor"] = self.internal_strain_tensor - - if self.dfpt and self.lepsilon: - dct |= { - "piezo_ionic_tensor": self.piezo_ionic_tensor, - "dielectric_ionic_tensor": self.dielectric_ionic_tensor, - } - - if self.lcalcpol: - dct |= {"p_elec": self.p_elec, "p_ion": self.p_ion} - if self.spin and not self.noncollinear: - dct |= {"p_sp1": self.p_sp1, "p_sp2": self.p_sp2} - dct["zval_dict"] = self.zval_dict - - if self.nmr_cs: - dct.update( - nmr_cs={ - "valence and core": self.data["chemical_shielding"]["valence_and_core"], - "valence_only": self.data["chemical_shielding"]["valence_only"], - "g0": self.data["cs_g0_contribution"], - "core": self.data["cs_core_contribution"], - "raw": self.data["unsym_cs_tensor"], - } - ) - - if self.nmr_efg: - dct.update( - nmr_efg={ - "raw": self.data["unsym_efg_tensor"], - "parameters": self.data["efg"], - } - ) - - if self.has_onsite_density_matrices: - # Cast Spin to str for consistency with electronic_structure - # TODO: improve handling of Enum (de)serialization in monty - onsite_density_matrices = [{str(k): v for k, v in d.items()} for d in self.data["onsite_density_matrices"]] - dct["onsite_density_matrices"] = onsite_density_matrices - - return dct - def read_pattern( self, patterns: dict[str, str], @@ -2446,32 +2346,32 @@ def read_pattern( arguments. Args: - patterns (dict[str, str]): Patterns, e.g. + patterns (dict): A dict of patterns, e.g. {"energy": r"energy\\(sigma->0\\)\\s+=\\s+([\\d\\-.]+)"}. reverse (bool): Read files in reverse. Defaults to false. Useful for - large files like OUTCARs, especially when used with + large files, esp OUTCARs, especially when used with terminate_on_match. terminate_on_match (bool): Whether to terminate when there is at - least one match for each key in patterns. + least one match in each key in pattern. postprocess (Callable): A post processing function to convert all matches. Defaults to str, i.e., no change. - Renders accessible from self.data: + Renders accessible: Any attribute in patterns. For example, {"energy": r"energy\\(sigma->0\\)\\s+=\\s+([\\d\\-.]+)"} will set the value of self.data["energy"] = [[-1234], [-3453], ...], to the - results from regex and postprocess. Note that the values - are list[list], because you can grep multiple items on one line. + results from regex and postprocess. Note that the returned values + are lists of lists, because you can grep multiple items on one line. """ matches = regrep( - filename=self.filename, - patterns=patterns, + self.filename, + patterns, reverse=reverse, terminate_on_match=terminate_on_match, postprocess=postprocess, ) - for key in patterns: - self.data[key] = [i[0] for i in matches.get(key, [])] + for k in patterns: + self.data[k] = [i[0] for i in matches.get(k, [])] def read_table_pattern( self, @@ -2514,25 +2414,27 @@ def read_table_pattern( Incompatible with last_one_only. Returns: - List of tables or a single table if last_one_only/first_one_only is True. - 1) A table is a list of rows. 2) A row is either a list of attribute - values in case the capturing group is defined without name in row_pattern, - or a dict in case that named capturing groups are defined by row_pattern. + List of tables. 1) A table is a list of rows. 2) A row if either a list of + attribute values in case the capturing group is defined without name in + row_pattern, or a dict in case that named capturing groups are defined by + row_pattern. """ if last_one_only and first_one_only: raise ValueError("last_one_only and first_one_only options are incompatible") - + # print("!!!!!I'M IN read_table_pattern()!!!!!") #TEST with zopen(self.filename, mode="rt", encoding="utf-8") as file: - text: str = file.read() # type:ignore[assignment] + text = file.read() + table_pattern_text = header_pattern + r"\s*^(?P(?:\s+" + row_pattern + r")+)\s+" + footer_pattern table_pattern = re.compile(table_pattern_text, re.MULTILINE | re.DOTALL) rp = re.compile(row_pattern) - - TableData: TypeAlias = list[list[Any] | dict[str, Any]] - tables: list[TableData] = [] + # print(f"....table_pattern: {table_pattern}!!!!!") #TEST + # print(f"rp: {rp}!!!!!") #TEST + tables: list[list] = [] for mt in table_pattern.finditer(text): table_body_text = mt.group("table_body") - table_contents: TableData = [] + # print(f".....table_body_text is: {table_body_text}") #TEST + table_contents = [] for line in table_body_text.split("\n"): ml = rp.search(line) # Skip empty lines @@ -2540,26 +2442,24 @@ def read_table_pattern( continue d = ml.groupdict() if len(d) > 0: - processed_line: list[Any] | dict[str, Any] = {k: postprocess(v) for k, v in d.items()} + processed_line: dict | list = {k: postprocess(v) for k, v in d.items()} else: processed_line = [postprocess(v) for v in ml.groups()] table_contents.append(processed_line) tables.append(table_contents) + # print("....APPENDED TABLE!!!!!") #TEST if first_one_only: break - retained_data: list[TableData] | TableData = tables[-1] if last_one_only or first_one_only else tables + # print(f".....length of tables is: {len(tables)}") #TEST + # print(f".....attribute name is: {attribute_name}") #TEST + retained_data: list = tables[-1] if last_one_only or first_one_only else tables + # print("....RETAINED DATA!!!!!") #TEST if attribute_name is not None: self.data[attribute_name] = retained_data return retained_data def read_electrostatic_potential(self) -> None: - """Parse the eletrostatic potential for the last ionic step. - - Renders accessible as attributes: - ngf (list[int, int, int]): Number of grid points along x, y, z dimensions. - sampling_radii (list[float, float, float]): Test charge radii. - electrostatic_potential (list[float]): The eletrostatic potential. - """ + """Parse the eletrostatic potential for the last ionic step.""" pattern = {"ngf": r"\s+dimension x,y,z NGXF=\s+([\.\-\d]+)\sNGYF=\s+([\.\-\d]+)\sNGZF=\s+([\.\-\d]+)"} self.read_pattern(pattern, postprocess=int) self.ngf = self.data.get("ngf", [[]])[0] @@ -2572,21 +2472,35 @@ def read_electrostatic_potential(self) -> None: table_pattern = r"((?:\s+\d+\s*[\.\-\d]+)+)" footer_pattern = r"\s+E-fermi :" - pot_patterns: list = self.read_table_pattern(header_pattern, table_pattern, footer_pattern) - pot_patterns_str: str = "".join(itertools.chain.from_iterable(pot_patterns)) - pots: list = re.findall(r"\s+\d+\s*([\.\-\d]+)+", pot_patterns_str) + pots: list = self.read_table_pattern(header_pattern, table_pattern, footer_pattern) + _pots: str = "".join(itertools.chain.from_iterable(pots)) + + pots = re.findall(r"\s+\d+\s*([\.\-\d]+)+", _pots) self.electrostatic_potential = [*map(float, pots)] - def read_freq_dielectric(self) -> None: + @staticmethod + def _parse_sci_notation(line: str) -> list[float]: + """ + Parse lines with values in scientific notation and potentially + without spaces in between the values. This assumes that the scientific + notation always lists two digits for the exponent, e.g. 3.535E-02. + + Args: + line: line to parse. + + Returns: + list[float]: numbers if found, empty if not. """ - Parse the frequency dependent dielectric function (obtained with LOPTICS). + if match := re.findall(r"[\.\-\d]+E[\+\-]\d{2}", line): + return [float(t) for t in match] + return [] - Renders accessible as attributes: - plasma_frequencies (dict[Literal["intraband", "interband"], NDArray[float64]]): - plasma frequency in eV. - dielectric_energies (NDArray[float64]): Dielectric energies. - dielectric_tensor_function (NDArray[complex128]): Dielectric tensor function. + def read_freq_dielectric(self) -> None: + """ + Parse the frequency dependent dielectric function (obtained with + LOPTICS). Frequencies (in eV) are in self.frequencies, and dielectric + tensor function is given as self.dielectric_tensor_function. """ plasma_pattern = r"plasma frequency squared.*" dielectric_pattern = ( @@ -2603,8 +2517,7 @@ def read_freq_dielectric(self) -> None: count = 0 component = "IMAGINARY" with zopen(self.filename, mode="rt", encoding="utf-8") as file: - line: str - for line in file: # type:ignore[assignment] + for line in file: line = line.strip() if re.match(plasma_pattern, line): read_plasma = "intraband" if "intraband" in line else "interband" @@ -2622,7 +2535,7 @@ def read_freq_dielectric(self) -> None: if re.match(row_pattern, line.strip()): tokens = line.strip().split() elif type(self)._parse_sci_notation(line.strip()): - tokens = type(self)._parse_sci_notation(line.strip()) # type:ignore[assignment] + tokens = type(self)._parse_sci_notation(line.strip()) elif re.match(r"\s*-+\s*", line): count += 1 @@ -2638,21 +2551,16 @@ def read_freq_dielectric(self) -> None: elif count == 3: break - self.plasma_frequencies: dict[Any, NDArray[np.float64]] = { - k: np.array(v[:3]) for k, v in plasma_frequencies.items() - } - self.dielectric_energies: NDArray[np.float64] = np.array(energies) - self.dielectric_tensor_function: NDArray[np.complex128] = np.array(data["REAL"]) + 1j * np.array( - data["IMAGINARY"] - ) + self.plasma_frequencies = {k: np.array(v[:3]) for k, v in plasma_frequencies.items()} + self.dielectric_energies = np.array(energies) + self.dielectric_tensor_function = np.array(data["REAL"]) + 1j * np.array(data["IMAGINARY"]) def read_chemical_shielding(self) -> None: """Parse the NMR chemical shieldings data. Only the second part "absolute, valence and core" will be parsed. And only the three right most field (ISO_SHIELDING, SPAN, SKEW) will be retrieved. - Renders accessible from self.data: - chemical_shielding (dict[Literal["valence_only", "valence_and_core"], list[list[float]]]): - Chemical shieldings in the order of atoms from the OUTCAR. Maryland notation is adopted. + Set self.data["chemical_shielding"] as: + List of chemical shieldings in the order of atoms from the OUTCAR. Maryland notation is adopted. """ header_pattern = ( r"\s+CSA tensor \(J\. Mason, Solid State Nucl\. Magn\. Reson\. 2, " # codespell:ignore reson @@ -2668,24 +2576,23 @@ def read_chemical_shielding(self) -> None: row_pattern = r"\d+(?:\s+[-]?\d+\.\d+){3}\s+" + r"\s+".join([r"([-]?\d+\.\d+)"] * 3) footer_pattern = r"-{50,}\s*$" h1 = header_pattern + first_part_pattern - cs_valence_only: list[list[float]] = self.read_table_pattern( + cs_valence_only = self.read_table_pattern( h1, row_pattern, footer_pattern, postprocess=float, last_one_only=True ) h2 = header_pattern + swallon_valence_body_pattern - cs_valence_and_core: list[list[float]] = self.read_table_pattern( + cs_valence_and_core = self.read_table_pattern( h2, row_pattern, footer_pattern, postprocess=float, last_one_only=True ) - chemical_shielding: dict[Literal["valence_only", "valence_and_core"], list[list[float]]] = { + self.data["chemical_shielding"] = { "valence_only": cs_valence_only, "valence_and_core": cs_valence_and_core, } - self.data["chemical_shielding"] = chemical_shielding def read_cs_g0_contribution(self) -> None: """Parse the G0 contribution of NMR chemical shielding. - Renders accessible from self.data: - cs_g0_contribution (list[list[float]]): G0 contribution matrix. + Set self.data["cs_g0_contribution"] as: + list[list]: G0 contribution matrix. """ header_pattern = ( r"^\s+G\=0 CONTRIBUTION TO CHEMICAL SHIFT \(field along BDIR\)\s+$\n" @@ -2707,8 +2614,8 @@ def read_cs_g0_contribution(self) -> None: def read_cs_core_contribution(self) -> None: """Parse the core contribution of NMR chemical shielding. - Renders accessible from self.data: - cs_core_contribution (dict[str, float]): Core contribution from each element. + Set self.data["cs_core_contribution"] as: + list[list]: G0 contribution matrix. """ header_pattern = r"^\s+Core NMR properties\s*$\n\n^\s+typ\s+El\s+Core shift \(ppm\)\s*$\n^\s+-{20,}$\n" row_pattern = r"\d+\s+(?P[A-Z][a-z]?\w?)\s+(?P[-]?\d+\.\d+)" @@ -2721,14 +2628,14 @@ def read_cs_core_contribution(self) -> None: last_one_only=True, attribute_name="cs_core_contribution", ) - core_contrib: dict[str, float] = {d["element"]: float(d["shift"]) for d in self.data["cs_core_contribution"]} + core_contrib = {d["element"]: float(d["shift"]) for d in self.data["cs_core_contribution"]} self.data["cs_core_contribution"] = core_contrib def read_cs_raw_symmetrized_tensors(self) -> None: """Parse the matrix form of NMR tensor before corrected to table. - Renders accessible from self.data: - unsym_cs_tensor (list[list[list[float]]]): Unsymmetrized tensors in the order of atoms. + Returns: + nsymmetrized tensors list in the order of atoms. """ header_pattern = r"\s+-{50,}\s+\s+Absolute Chemical Shift tensors\s+\s+-{50,}$" first_part_pattern = r"\s+UNSYMMETRIZED TENSORS\s+$" @@ -2736,7 +2643,7 @@ def read_cs_raw_symmetrized_tensors(self) -> None: unsym_footer_pattern = r"^\s+SYMMETRIZED TENSORS\s+$" with zopen(self.filename, mode="rt", encoding="utf-8") as file: - text: str = file.read() # type:ignore[assignment] + text = file.read() unsym_table_pattern_text = header_pattern + first_part_pattern + r"(?P.+)" + unsym_footer_pattern table_pattern = re.compile(unsym_table_pattern_text, re.MULTILINE | re.DOTALL) row_pat = re.compile(row_pattern) @@ -2746,30 +2653,26 @@ def read_cs_raw_symmetrized_tensors(self) -> None: micro_header_pattern = r"ion\s+\d+" micro_table_pattern_text = micro_header_pattern + r"\s*^(?P(?:\s*" + row_pattern + r")+)\s+" micro_table_pattern = re.compile(micro_table_pattern_text, re.MULTILINE | re.DOTALL) - unsym_tensors: list[list[list[float]]] = [] + unsym_tensors = [] for mt in micro_table_pattern.finditer(table_text): table_body_text = mt.group("table_body") - tensor_matrix: list[list[float]] = [] + tensor_matrix = [] for line in table_body_text.rstrip().split("\n"): ml = row_pat.search(line) if ml is None: raise RuntimeError(f"failure to find pattern, {ml=}") - processed_line: list[float] = [float(v) for v in ml.groups()] + processed_line = [float(v) for v in ml.groups()] tensor_matrix.append(processed_line) unsym_tensors.append(tensor_matrix) self.data["unsym_cs_tensor"] = unsym_tensors else: raise ValueError("NMR UNSYMMETRIZED TENSORS is not found") - def read_nmr_efg_tensor(self) -> list[NDArray[np.float64]]: + def read_nmr_efg_tensor(self) -> list[NDArray]: """Parses the NMR Electric Field Gradient Raw Tensors. Returns: - list[NDArray[float64]]: Electric Field Gradient Tensors in the order of atoms. - - Renders accessible from self.data: - unsym_efg_tensor (list[NDArray[float64]]): Electric Field Gradient - Tensors in the order of atoms. + A list of Electric Field Gradient Tensors in the order of Atoms from OUTCAR. """ header_pattern = ( r"Electric field gradients \(V/A\^2\)\n-*\n ion\s+V_xx\s+V_yy\s+V_zz\s+V_xy\s+V_xz\s+V_yz\n-*\n" @@ -2779,17 +2682,16 @@ def read_nmr_efg_tensor(self) -> list[NDArray[np.float64]]: footer_pattern = r"-*\n" data = self.read_table_pattern(header_pattern, row_pattern, footer_pattern, postprocess=float) - tensors: list[NDArray[np.float64]] = [make_symmetric_matrix_from_upper_tri(d) for d in data] + tensors = [make_symmetric_matrix_from_upper_tri(d) for d in data] self.data["unsym_efg_tensor"] = tensors return tensors def read_nmr_efg(self) -> None: """Parse the NMR Electric Field Gradient interpreted values. - Renders accessible from self.data: - efg (list[dict[Literal["cq", "eta", "nuclear_quadrupole_moment"], float]]): - Electric Field Gradient tensors in the order of atoms. - Each dict key/value pair corresponds to a component of the tensors. + Set self.data["efg"] as: + Electric Field Gradient tensors as a list of dict in the order of atoms from OUTCAR. + Each dict key/value pair corresponds to a component of the tensors. """ header_pattern = ( r"^\s+NMR quadrupolar parameters\s+$\n" @@ -2817,37 +2719,28 @@ def read_elastic_tensor(self) -> None: """ Parse the elastic tensor data. - Renders accessible from self.data: - elastic_tensor[list[list[float]]]: 6x6 array corresponding to the elastic tensor. + Set self.data["elastic_tensor"] as: + 6x6 array corresponding to the elastic tensor from the OUTCAR. """ - header_pattern = r"TOTAL ELASTIC MODULI \(kBar\)\s+Direction\s+([X-Z][X-Z]\s+)+\-+" + header_pattern = r"TOTAL ELASTIC MODULI \(kBar\)\s*\n\s*Direction\s+XX\s+YY\s+ZZ\s+XY\s+YZ\s+ZX\s*\n\s*-+" row_pattern = r"[X-Z][X-Z]\s+" + r"\s+".join([r"(\-*[\.\d]+)"] * 6) footer_pattern = r"\-+" - et_table: list[list[float]] = self.read_table_pattern( - header_pattern, row_pattern, footer_pattern, postprocess=float - ) + et_table = self.read_table_pattern(header_pattern, row_pattern, footer_pattern, postprocess=float) self.data["elastic_tensor"] = et_table def read_piezo_tensor(self) -> None: - """Parse the piezo tensor data. - - Renders accessible from self.data: - piezo_tensor (list[list[float]]): The piezo tensor. - """ + """Parse the piezo tensor data.""" header_pattern = r"PIEZOELECTRIC TENSOR for field in x, y, z\s+\(C/m\^2\)\s+([X-Z][X-Z]\s+)+\-+" row_pattern = r"[x-z]\s+" + r"\s+".join([r"(\-*[\.\d]+)"] * 6) footer_pattern = r"BORN EFFECTIVE" - piezo_tensor: list[list[float]] = self.read_table_pattern( - header_pattern, row_pattern, footer_pattern, postprocess=float - ) - self.data["piezo_tensor"] = piezo_tensor + pt_table = self.read_table_pattern(header_pattern, row_pattern, footer_pattern, postprocess=float) + self.data["piezo_tensor"] = pt_table def read_onsite_density_matrices(self) -> None: """Parse the onsite density matrices. - Renders accessible from self.data: - onsite_density_matrices (list[dict[Spin, list[list[float]]]]): - Onsite density matrices with index corresponding to atom index in Structure. + Set self.data["onsite_density_matrices"] as: + List with index corresponding to atom index in Structure. """ # Matrix size will vary depending on if d or f orbitals are present. # Therefore regex assumes f, but filter out None values if d. @@ -2879,24 +2772,21 @@ def read_onsite_density_matrices(self) -> None: spin2_component = [[[e for e in row if e is not None] for row in matrix] for matrix in spin2_component] - onsite_density_matrices: list[dict[Spin, list[list[float]]]] = [ + self.data["onsite_density_matrices"] = [ {Spin.up: spin1_component[idx], Spin.down: spin2_component[idx]} for idx in range(len(spin1_component)) ] - self.data["onsite_density_matrices"] = onsite_density_matrices def read_corrections( self, reverse: bool = True, terminate_on_match: bool = True, ) -> None: - """Read the dipol qudropol correction. + """Read the dipol qudropol corrections into + self.data["dipol_quadrupol_correction"]. Args: reverse (bool): Whether to start from end of OUTCAR. Defaults to True. terminate_on_match (bool): Whether to terminate once match is found. Defaults to True. - - Renders accessible from self.data: - dipol_quadrupol_correction (float): Dipol qudropol correction. """ patterns = {"dipol_quadrupol_correction": r"dipol\+quadrupol energy correction\s+([\d\-\.]+)"} self.read_pattern( @@ -2905,8 +2795,7 @@ def read_corrections( terminate_on_match=terminate_on_match, postprocess=float, ) - dipol_quadrupol_correction: float = self.data["dipol_quadrupol_correction"][0][0] - self.data["dipol_quadrupol_correction"] = dipol_quadrupol_correction + self.data["dipol_quadrupol_correction"] = self.data["dipol_quadrupol_correction"][0][0] def read_neb( self, @@ -2920,15 +2809,17 @@ def read_neb( Args: reverse (bool): Read files in reverse. Defaults to false. Useful for - large files, especially when used with terminate_on_match. - Defaults to True here since we usually want only the final value. + large files, esp OUTCARs, especially when used with + terminate_on_match. Defaults to True here since we usually + want only the final value. terminate_on_match (bool): Whether to terminate when there is at least one match in each key in pattern. Defaults to True here since we usually want only the final value. - Renders accessible from self.data: - energy (float): Final energy. - tangent_force (float): Final tangent force. + Renders accessible: + tangent_force - Final tangent force. + energy - Final energy. + These can be accessed under Outcar.data[key] """ patterns = { "energy": r"energy\(sigma->0\)\s+=\s+([\d\-\.]+)", @@ -2951,19 +2842,19 @@ def read_igpar(self) -> None: See VASP sections "LBERRY, IGPAR, NPPSTR, DIPOL" for info on what these are. - Renders accessible as attributes: - er_ev (dict[Spin, NDArray[float]]): e_ev. - er_bp (dict[Spin, NDArray[float]]): e_bp. - er_ev_tot (NDArray[float]): spin up + spin down summed. - er_bp_tot (NDArray[float]): spin up + spin down summed. - p_elec (int): spin up + spin down summed. - p_ion (int): spin up + spin down summed. + Renders accessible: + er_ev = e_ev (dictionary with Spin.up/Spin.down as keys) + er_bp = e_bp (dictionary with Spin.up/Spin.down as keys) + er_ev_tot = spin up + spin down summed + er_bp_tot = spin up + spin down summed + p_elc = spin up + spin down summed + p_ion = spin up + spin down summed. """ # Variables to be filled - self.er_ev: dict[Spin, NDArray[np.float64]] = {} # array(3*float) - self.er_bp: dict[Spin, NDArray[np.float64]] = {} # array(3*float) - self.er_ev_tot: NDArray | None = None # array(3*float) - self.er_bp_tot: NDArray | None = None # array(3*float) + self.er_ev = {} # dict (Spin.up/down) of array(3*float) + self.er_bp = {} # dict (Spin.up/down) of array(3*float) + self.er_ev_tot = None # array(3*float) + self.er_bp_tot = None # array(3*float) self.p_elec: int | None = None self.p_ion: int | None = None try: @@ -3042,27 +2933,26 @@ def p_ion(results, match): search.append([ionic_dipole_moment_pattern, None, p_ion]) self.context = None - self.er_ev = {Spin.up: None, Spin.down: None} # type:ignore[dict-item] - self.er_bp = {Spin.up: None, Spin.down: None} # type:ignore[dict-item] + self.er_ev = {Spin.up: None, Spin.down: None} + self.er_bp = {Spin.up: None, Spin.down: None} - micro_pyawk(self.filename, search, self) # type:ignore[arg-type] + micro_pyawk(self.filename, search, self) if self.er_ev[Spin.up] is not None and self.er_ev[Spin.down] is not None: - self.er_ev_tot = self.er_ev[Spin.up] + self.er_ev[Spin.down] # type: ignore[operator,assignment] + self.er_ev_tot = self.er_ev[Spin.up] + self.er_ev[Spin.down] # type: ignore[operator] if self.er_bp[Spin.up] is not None and self.er_bp[Spin.down] is not None: - self.er_bp_tot = self.er_bp[Spin.up] + self.er_bp[Spin.down] # type: ignore[operator,assignment] + self.er_bp_tot = self.er_bp[Spin.up] + self.er_bp[Spin.down] # type: ignore[operator] except Exception as exc: raise RuntimeError("IGPAR OUTCAR could not be parsed.") from exc - def read_internal_strain_tensor(self) -> None: - """Read the internal strain tensor. - - Renders accessible as attributes: - internal_strain_tensor (list[NDArray[float64]]): Voigt notation tensors for each site. + def read_internal_strain_tensor(self): + """Read the internal strain tensor and populates + self.internal_strain_tensor with an array of voigt notation + tensors for each site. """ - search: list[list] = [] + search = [] def internal_strain_start(results, match: str) -> None: results.internal_strain_ion = int(match[1]) - 1 @@ -3100,15 +2990,13 @@ def internal_strain_data(results, match: str) -> None: ) self.internal_strain_ion = None - self.internal_strain_tensor: list[NDArray[np.float64]] = [] - micro_pyawk(self.filename, search, self) # type:ignore[arg-type] + self.internal_strain_tensor = [] + micro_pyawk(self.filename, search, self) def read_lepsilon(self) -> None: """Read a LEPSILON run. - Renders accessible as attributes: - dielectric_tensor (list[list[float]]): Dielectric tensor. - piezo_tensor (list[list[float]]): The piezo tensor. + TODO: Document the actual variables. """ try: search = [] @@ -3170,13 +3058,17 @@ def dielectric_section_stop(results, match): def piezo_section_start(results, _match): results.piezo_index = 0 - search.append( - [ - r"PIEZOELECTRIC TENSOR for field in x, y, z \(C/m\^2\)", - None, - piezo_section_start, - ] - ) + if not self.lrpa: + search.append( + [ + ( + r"PIEZOELECTRIC TENSOR \(including local field effects\)" + r"(?:\s*for\s*field\s*in\s*x,\s*y,\s*z\s*)? \(C/m\^2\)" + ), + None, + piezo_section_start, + ] + ) def piezo_data(results, match): results.piezo_tensor[results.piezo_index, :] = np.array([float(match[i]) for i in range(1, 7)]) @@ -3249,14 +3141,14 @@ def born_section_stop(results, _match): ) self.born_ion = None - self.born: list | NDArray = [] + self.born: list | np.ndarray = [] - micro_pyawk(self.filename, search, self) # type:ignore[arg-type] + micro_pyawk(self.filename, search, self) self.born = np.array(self.born) - self.dielectric_tensor = self.dielectric_tensor.tolist() # type:ignore[assignment] - self.piezo_tensor = self.piezo_tensor.tolist() # type:ignore[assignment] + self.dielectric_tensor = self.dielectric_tensor.tolist() + self.piezo_tensor = self.piezo_tensor.tolist() except Exception as exc: raise RuntimeError("LEPSILON OUTCAR could not be parsed.") from exc @@ -3264,9 +3156,7 @@ def born_section_stop(results, _match): def read_lepsilon_ionic(self) -> None: """Read the ionic component of a LEPSILON run. - Renders accessible as attributes: - dielectric_ionic_tensor (list[list[float]]): Ionic dielectric tensor. - piezo_ionic_tensor (list[list[float]]): Ionic piezoelectric tensor. + TODO: Document the actual variables. """ try: search = [] @@ -3381,10 +3271,10 @@ def piezo_section_stop(results, _match): self.piezo_ionic_index = None self.piezo_ionic_tensor = np.zeros((3, 6)) - micro_pyawk(self.filename, search, self) # type:ignore[arg-type] + micro_pyawk(self.filename, search, self) - self.dielectric_ionic_tensor = self.dielectric_ionic_tensor.tolist() # type:ignore[assignment] - self.piezo_ionic_tensor = self.piezo_ionic_tensor.tolist() # type:ignore[assignment] + self.dielectric_ionic_tensor = self.dielectric_ionic_tensor.tolist() + self.piezo_ionic_tensor = self.piezo_ionic_tensor.tolist() except Exception as exc: raise RuntimeError("ionic part of LEPSILON OUTCAR could not be parsed.") from exc @@ -3392,16 +3282,12 @@ def piezo_section_stop(results, _match): def read_lcalcpol(self) -> None: """Read the LCALCPOL. - Renders accessible as attributes: - p_elec (NDArray[float64]): Total electronic dipole moment. - p_ion (NDArray[float64]): Ionic dipole moment. - p_sp1 (NDArray[float] | None): Spin up. - p_sp2 (NDArray[float] | None): Spin down. + TODO: Document the actual variables. """ self.p_elec = None - self.p_ion = None self.p_sp1: int | None = None self.p_sp2: int | None = None + self.p_ion = None try: search = [] @@ -3481,12 +3367,12 @@ def p_ion(results, match): ] ) - micro_pyawk(self.filename, search, self) # type:ignore[arg-type] + micro_pyawk(self.filename, search, self) # Fix polarization units in new versions of VASP regex = r"^.*Ionic dipole moment: .*" search = [[regex, None, lambda x, y: x.append(y.group(0))]] - results = micro_pyawk(self.filename, search, []) # type:ignore[arg-type] + results = micro_pyawk(self.filename, search, []) if "|e|" in results[0]: self.p_elec *= -1 # type: ignore[operator] @@ -3499,11 +3385,7 @@ def p_ion(results, match): raise RuntimeError("LCALCPOL OUTCAR could not be parsed.") from exc def read_pseudo_zval(self) -> None: - """Create a pseudopotential valence electron number (ZVAL) dictionary. - - Renders accessible as attributes: - zval_dict (dict[str, float]): ZVAL for each element. - """ + """Create a pseudopotential ZVAL dictionary.""" try: def atom_symbols(results, match): @@ -3524,9 +3406,9 @@ def zvals(results, match): ) ) - micro_pyawk(self.filename, search, self) # type:ignore[arg-type] + micro_pyawk(self.filename, search, self) - self.zval_dict: dict[str, float] = dict(zip(self.atom_symbols, self.zvals, strict=True)) # type: ignore[attr-defined] + self.zval_dict = dict(zip(self.atom_symbols, self.zvals, strict=True)) # type: ignore[attr-defined] # Clean up del self.atom_symbols # type: ignore[attr-defined] @@ -3535,32 +3417,33 @@ def zvals(results, match): except Exception as exc: raise RuntimeError("ZVAL dict could not be parsed.") from exc - def read_core_state_eigen(self) -> list[dict[str, list[float]]]: + def read_core_state_eigen(self) -> list[dict]: """Read the core state eigenenergies at each ionic step. Returns: - list[dict[str, list[float]]]: The atom such as [{"AO": [core_state_eig, ]}, ]. - The core state eigenenergie list for each AO is over all ionic step. + A list of dict over the atom such as [{"AO":[core state eig]}]. + The core state eigenenergie list for each AO is over all ionic + step. Example: The core state eigenenergie of the 2s AO of the 6th atom of the structure at the last ionic step is [5]["2s"][-1]. """ with zopen(self.filename, mode="rt", encoding="utf-8") as foutcar: - line: str = foutcar.readline() # type:ignore[assignment] - core_state_eigs: list[dict[str, list[float]]] = [] + line = foutcar.readline() + cl: list[dict] = [] while line != "": - line = foutcar.readline() # type:ignore[assignment] + line = foutcar.readline() if "NIONS =" in line: natom = int(line.split("NIONS =")[1]) - core_state_eigs = [defaultdict(list) for _ in range(natom)] + cl = [defaultdict(list) for _ in range(natom)] if "the core state eigen" in line: iat = -1 while line != "": - line = foutcar.readline() # type:ignore[assignment] + line = foutcar.readline() # don't know number of lines to parse without knowing # specific species, so stop parsing when we reach # "E-fermi" instead @@ -3574,34 +3457,34 @@ def read_core_state_eigen(self) -> list[dict[str, list[float]]]: iat += 1 # started parsing a new ion data = data[1:] # remove element with ion number for i in range(0, len(data), 2): - core_state_eigs[iat][data[i]].append(float(data[i + 1])) - return core_state_eigs + cl[iat][data[i]].append(float(data[i + 1])) + return cl - def read_avg_core_poten(self) -> list[list[float]]: + def read_avg_core_poten(self) -> list[list]: """Read the core potential at each ionic step. Returns: - list[list[float]]: The average core potentials for each atom of each ionic - step as: [[avg_core_pot, ], ]. + A list for each ionic step containing a list of the average core + potentials for each atom: [[avg core pot]]. Example: The average core potential of the 2nd atom of the structure at the - last ionic step is: [-1][1]. + last ionic step is: [-1][1] """ with zopen(self.filename, mode="rt", encoding="utf-8") as foutcar: line = foutcar.readline() - avg_core_pots: list[list[float]] = [] + aps: list[list[float]] = [] while line != "": line = foutcar.readline() if "the norm of the test charge is" in line: - avg_pot: list[float] = [] + ap: list[float] = [] while line != "": line = foutcar.readline() # don't know number of lines to parse without knowing # specific species, so stop parsing when we reach # "E-fermi" instead if "E-fermi" in line: - avg_core_pots.append(avg_pot) + aps.append(ap) break # the average core potentials of up to 5 elements are @@ -3611,30 +3494,107 @@ def read_avg_core_poten(self) -> list[list[float]]: npots = int((len(line) - 1) / 17) for i in range(npots): start = i * 17 - avg_pot.append(float(line[start + 8 : start + 17])) + ap.append(float(line[start + 8 : start + 17])) + + return aps + + def as_dict(self) -> dict: + """MSONable dict.""" + dct = { + "@module": type(self).__module__, + "@class": type(self).__name__, + "efermi": self.efermi, + "run_stats": self.run_stats, + "magnetization": self.magnetization, + "charge": self.charge, + "total_magnetization": self.total_mag, + "nelect": self.nelect, + "is_stopped": self.is_stopped, + "drift": self.drift, + "ngf": self.ngf, + "sampling_radii": self.sampling_radii, + "electrostatic_potential": self.electrostatic_potential, + } + + if self.lepsilon: + dct |= { + "piezo_tensor": self.piezo_tensor, + "dielectric_tensor": self.dielectric_tensor, + "born": self.born, + } + + if self.lcalceps: + dct |= { + "piezo_tensor": self.piezo_tensor, + "dielectric_tensor": self.dielectric_tensor, + "born": self.born, + } + if self.fd and self.lcalceps: + dct |= { + "piezo_ionic_tensor": self.piezo_ionic_tensor, + "dielectric_ionic_tensor": self.dielectric_ionic_tensor, + } + if self.fd and self.varcell: + dct |= {"elastic_tensor": self.data["elastic_tensor"]} + + if self.dfpt: + dct["internal_strain_tensor"] = self.internal_strain_tensor + + if self.dfpt and self.lepsilon: + dct |= { + "piezo_ionic_tensor": self.piezo_ionic_tensor, + "dielectric_ionic_tensor": self.dielectric_ionic_tensor, + } - return avg_core_pots + if self.lcalcpol: + dct |= {"p_elec": self.p_elec, "p_ion": self.p_ion} + if self.spin and not self.noncollinear: + dct |= {"p_sp1": self.p_sp1, "p_sp2": self.p_sp2} + dct["zval_dict"] = self.zval_dict + + if self.nmr_cs: + dct.update( + nmr_cs={ + "valence and core": self.data["chemical_shielding"]["valence_and_core"], + "valence_only": self.data["chemical_shielding"]["valence_only"], + "g0": self.data["cs_g0_contribution"], + "core": self.data["cs_core_contribution"], + "raw": self.data["unsym_cs_tensor"], + } + ) + + if self.nmr_efg: + dct.update( + nmr_efg={ + "raw": self.data["unsym_efg_tensor"], + "parameters": self.data["efg"], + } + ) + + if self.has_onsite_density_matrices: + # Cast Spin to str for consistency with electronic_structure + # TODO: improve handling of Enum (de)serialization in monty + onsite_density_matrices = [{str(k): v for k, v in d.items()} for d in self.data["onsite_density_matrices"]] + dct["onsite_density_matrices"] = onsite_density_matrices + + return dct def read_fermi_contact_shift(self) -> None: """Read Fermi contact (isotropic) hyperfine coupling parameter. Output example: - Fermi contact (isotropic) hyperfine coupling parameter (MHz) - ------------------------------------------------------------- - ion A_pw A_1PS A_1AE A_1c A_tot - ------------------------------------------------------------- - 1 -0.002 -0.002 -0.051 0.000 -0.052 - 2 -0.002 -0.002 -0.051 0.000 -0.052 - 3 0.056 0.056 0.321 -0.048 0.321 - ------------------------------------------------------------- - which corresponds to: - [[-0.002, -0.002, -0.051, 0.0, -0.052], - [-0.002, -0.002, -0.051, 0.0, -0.052], - [0.056, 0.056, 0.321, -0.048, 0.321]] from 'fch' data. - - Renders accessible from self.data: - fermi_contact_shift (dict[Literal["fch", "dh", "th"], list[list[float]]]): - Fermi contact (isotropic) hyperfine coupling parameter. + Fermi contact (isotropic) hyperfine coupling parameter (MHz) + ------------------------------------------------------------- + ion A_pw A_1PS A_1AE A_1c A_tot + ------------------------------------------------------------- + 1 -0.002 -0.002 -0.051 0.000 -0.052 + 2 -0.002 -0.002 -0.051 0.000 -0.052 + 3 0.056 0.056 0.321 -0.048 0.321 + ------------------------------------------------------------- + which corresponds to: + [[-0.002, -0.002, -0.051, 0.0, -0.052], + [-0.002, -0.002, -0.051, 0.0, -0.052], + [0.056, 0.056, 0.321, -0.048, 0.321]] from 'fch' data. """ # Fermi contact (isotropic) hyperfine coupling parameter (MHz) header_pattern1 = ( @@ -3645,7 +3605,7 @@ def read_fermi_contact_shift(self) -> None: ) row_pattern1 = r"(?:\d+)\s+" + r"\s+".join([r"([-]?\d+\.\d+)"] * 5) footer_pattern = r"\-+" - fch_table: list[list[float]] = self.read_table_pattern( + fch_table = self.read_table_pattern( header_pattern1, row_pattern1, footer_pattern, @@ -3661,7 +3621,7 @@ def read_fermi_contact_shift(self) -> None: r"\s*\-+" ) row_pattern2 = r"(?:\d+)\s+" + r"\s+".join([r"([-]?\d+\.\d+)"] * 6) - dh_table: list[list[float]] = self.read_table_pattern( + dh_table = self.read_table_pattern( header_pattern2, row_pattern2, footer_pattern, @@ -3678,7 +3638,7 @@ def read_fermi_contact_shift(self) -> None: r"\s*\-+" ) row_pattern3 = r"(?:\d+)\s+" + r"\s+".join([r"([-]?\d+\.\d+)"] * 4) - th_table: list[list[float]] = self.read_table_pattern( + th_table = self.read_table_pattern( header_pattern3, row_pattern3, footer_pattern, @@ -3686,11 +3646,7 @@ def read_fermi_contact_shift(self) -> None: last_one_only=True, ) - fc_shift_table: dict[Literal["fch", "dh", "th"], list[list[float]]] = { - "fch": fch_table, - "dh": dh_table, - "th": th_table, - } + fc_shift_table = {"fch": fch_table, "dh": dh_table, "th": th_table} self.data["fermi_contact_shift"] = fc_shift_table @@ -3714,8 +3670,8 @@ def parse_file(filename: PathLike) -> tuple[Poscar, dict, dict]: """ poscar_read = False poscar_string: list[str] = [] - dataset: NDArray = np.zeros((1, 1, 1)) - all_dataset: list[NDArray] = [] + dataset: np.ndarray = np.zeros((1, 1, 1)) + all_dataset: list[np.ndarray] = [] # for holding any strings in input that are not Poscar # or VolumetricData (typically augmentation charges) all_dataset_aug: dict[int, list[str]] = {} @@ -3745,7 +3701,7 @@ def parse_file(filename: PathLike) -> tuple[Poscar, dict, dict]: elif not poscar_read: if line != "" or len(poscar_string) == 0: - poscar_string.append(line) # type:ignore[arg-type] + poscar_string.append(line) elif line == "": poscar = Poscar.from_str("\n".join(poscar_string)) poscar_read = True @@ -3753,7 +3709,7 @@ def parse_file(filename: PathLike) -> tuple[Poscar, dict, dict]: elif not dim: dim = [int(i) for i in line.split()] ngrid_pts = dim[0] * dim[1] * dim[2] - dimline = line # type:ignore[assignment] + dimline = line read_dataset = True dataset = np.zeros(dim) @@ -3770,7 +3726,7 @@ def parse_file(filename: PathLike) -> tuple[Poscar, dict, dict]: key = len(all_dataset) - 1 if key not in all_dataset_aug: all_dataset_aug[key] = [] - all_dataset_aug[key].append(original_line) # type:ignore[arg-type] + all_dataset_aug[key].append(original_line) if len(all_dataset) == 4: data = { @@ -3843,21 +3799,21 @@ def format_fortran_float(flt: float) -> str: def write_spin(data_type: str) -> None: lines = [] count = 0 - file.write(f" {dim[0]} {dim[1]} {dim[2]}\n") # type:ignore[arg-type] + file.write(f" {dim[0]} {dim[1]} {dim[2]}\n") for k, j, i in itertools.product(list(range(dim[2])), list(range(dim[1])), list(range(dim[0]))): lines.append(format_fortran_float(self.data[data_type][i, j, k])) count += 1 if count % 5 == 0: - file.write(" " + "".join(lines) + "\n") # type:ignore[arg-type] + file.write(" " + "".join(lines) + "\n") lines = [] else: lines.append(" ") if count % 5 != 0: - file.write(" " + "".join(lines) + " \n") # type:ignore[arg-type] + file.write(" " + "".join(lines) + " \n") - data: list | NDArray = self.data_aug.get(data_type, []) if self.data_aug is not None else [] + data = self.data_aug.get(data_type, []) if isinstance(data, Iterable): - file.write("".join(data)) # type:ignore[arg-type] + file.write("".join(data)) with zopen(file_name, mode="wt", encoding="utf-8") as file: poscar = Poscar(self.structure) @@ -3877,7 +3833,7 @@ def write_spin(data_type: str) -> None: dim, b, c = site.frac_coords lines += f"{dim:10.6f}{b:10.6f}{c:10.6f}\n" lines += " \n" - file.write(lines) # type:ignore[arg-type] + file.write(lines) dim = self.dim write_spin("total") @@ -3893,11 +3849,11 @@ def write_spin(data_type: str) -> None: class Locpot(VolumetricData): """LOCPOT file reader.""" - def __init__(self, poscar: Poscar, data: dict[str, NDArray], **kwargs) -> None: + def __init__(self, poscar: Poscar, data: np.ndarray, **kwargs) -> None: """ Args: poscar (Poscar): Poscar object containing structure. - data (NDArray): Actual data. + data (np.ndarray): Actual data. """ super().__init__(poscar.structure, data, **kwargs) self.name = poscar.comment @@ -3923,7 +3879,7 @@ def __init__( self, poscar: Poscar | Structure, data: dict[str, NDArray], - data_aug: dict[str, NDArray] | None = None, + data_aug: NDArray | None = None, ) -> None: """ Args: @@ -3944,7 +3900,7 @@ def __init__( raise TypeError("Unsupported POSCAR type.") super().__init__(struct, data, data_aug=data_aug) - self._distance_matrix = {} + self._distance_matrix: dict = {} @classmethod def from_file(cls, filename: str) -> Self: @@ -3957,7 +3913,7 @@ def from_file(cls, filename: str) -> Self: Chgcar """ poscar, data, data_aug = VolumetricData.parse_file(filename) - return cls(poscar, data, data_aug=data_aug) # type:ignore[arg-type] + return cls(poscar, data, data_aug=data_aug) @property def net_magnetization(self) -> float | None: @@ -4032,23 +3988,23 @@ class Procar(MSONable): Attributes: data (dict): The PROCAR data of the form below. It should VASP uses 1-based indexing, but all indices are converted to 0-based here. - {spin: np.array accessed with (k-point index, band index, ion index, orbital index)} - weights (NDArray): The weights associated with each k-point as an array of length nkpoints. + { spin: np.array accessed with (k-point index, band index, ion index, orbital index) } + weights (np.array): The weights associated with each k-point as an np.array of length nkpoints. phase_factors (dict): Phase factors, where present (e.g. LORBIT = 12). A dict of the form: - {spin: complex np.array accessed with (k-point index, band index, ion index, orbital index)} + { spin: complex np.array accessed with (k-point index, band index, ion index, orbital index) } nbands (int): Number of bands. nkpoints (int): Number of k-points. nions (int): Number of ions. nspins (int): Number of spins. is_soc (bool): Whether the PROCAR contains spin-orbit coupling (LSORBIT = True) data. - kpoints (NDArray): The k-points as an np.array of shape (nkpoints, 3). + kpoints (np.array): The k-points as an np.array of shape (nkpoints, 3). occupancies (dict): The occupancies of the bands as a dict of the form: - {spin: np.array accessed with (k-point index, band index)} + { spin: np.array accessed with (k-point index, band index) } eigenvalues (dict): The eigenvalues of the bands as a dict of the form: - {spin: np.array accessed with (k-point index, band index)} + { spin: np.array accessed with (k-point index, band index) } xyz_data (dict): The PROCAR projections data along the x,y and z magnetisation projection directions, with is_soc = True (see VASP wiki for more info). - {'x'/'y'/'z': np.array accessed with (k-point index, band index, ion index, orbital index)} + { 'x'/'y'/'z': np.array accessed with (k-point index, band index, ion index, orbital index) } """ def __init__(self, filename: PathLike | list[PathLike]): @@ -4165,7 +4121,7 @@ def read(self, filenames: list[PathLike]): else: self.xyz_data = None - def _parse_kpoint_line(self, line: str) -> tuple[float, float, float]: + def _parse_kpoint_line(self, line): """ Parse k-point vector from a PROCAR line. @@ -4173,13 +4129,13 @@ def _parse_kpoint_line(self, line: str) -> tuple[float, float, float]: '0.00000000-0.50000000-0.50000000' when there are negative signs, so need to be able to recognise and handle this. """ - fields: list[str] = line.split() - kpoint_fields: list[str] = fields[3 : fields.index("weight")] - _kpoint_fields: list[list[str]] = [" -".join(field.split("-")).split() for field in kpoint_fields] - kpoint_fields = [val for sublist in _kpoint_fields for val in sublist] # flattened + fields = line.split() + kpoint_fields = fields[3 : fields.index("weight")] + kpoint_fields = [" -".join(field.split("-")).split() for field in kpoint_fields] + kpoint_fields = [val for sublist in kpoint_fields for val in sublist] # flatten - # tuple to make it hashable, rounded to 5 decimal places to ensure proper kpoint matching - return cast("tuple[float, float, float]", tuple(round(float(val), 5) for val in kpoint_fields)) + return tuple(round(float(val), 5) for val in kpoint_fields) # tuple to make it hashable, + # rounded to 5 decimal places to ensure proper kpoint matching def _read(self, filename: PathLike, parsed_kpoints: set[tuple[Kpoint]] | None = None): """Main function for reading in the PROCAR projections data. @@ -4191,7 +4147,7 @@ def _read(self, filename: PathLike, parsed_kpoints: set[tuple[Kpoint]] | None = """ if parsed_kpoints is None: parsed_kpoints = set() - line: str + with zopen(filename, mode="rt", encoding="utf-8") as file: preamble_expr = re.compile(r"# of k-points:\s*(\d+)\s+# of bands:\s*(\d+)\s+# of ions:\s*(\d+)") kpoint_expr = re.compile(r"^k-point\s+(\d+).*weight = ([0-9\.]+)") @@ -4207,13 +4163,13 @@ def _read(self, filename: PathLike, parsed_kpoints: set[tuple[Kpoint]] | None = kpoints: list[tuple[float, float, float]] = [] n_bands = None n_ions = None - weights: NDArray[np.float64] | None = None + weights: np.ndarray[float] | None = None headers = None - data: dict[Spin, NDArray] = {} - eigenvalues: dict[Spin, NDArray] | None = None - occupancies: dict[Spin, NDArray] | None = None - phase_factors: dict[Spin, NDArray] | None = None - xyz_data: dict[str, NDArray] | None = None # 'x'/'y'/'z' as keys for SOC projections dict + data: dict[Spin, np.ndarray] = {} + eigenvalues: dict[Spin, np.ndarray] | None = None + occupancies: dict[Spin, np.ndarray] | None = None + phase_factors: dict[Spin, np.ndarray] | None = None + xyz_data: dict[str, np.ndarray] | None = None # 'x'/'y'/'z' as keys for SOC projections dict # keep track of parsed kpoints, to avoid redundant/duplicate parsing with multiple PROCARs: this_procar_parsed_kpoints = ( set() @@ -4223,7 +4179,7 @@ def _read(self, filename: PathLike, parsed_kpoints: set[tuple[Kpoint]] | None = # total and x,y,z) for each band, while non-SOC have only 1 list of projections: tot_count = 0 band_count = 0 - for line in file: # type:ignore[assignment] + for line in file: if total_expr.match(line): tot_count += 1 elif band_expr.match(line): @@ -4248,8 +4204,8 @@ def _read(self, filename: PathLike, parsed_kpoints: set[tuple[Kpoint]] | None = skipping_kpoint = False # true when skipping projections for a previously-parsed kpoint ion_line_count = 0 # printed twice when phase factors present proj_data_parsed_for_band = 0 # 0 for non-SOC, 1-4 for SOC/phase factors - for line in file: # type:ignore[assignment] - line = line.strip() # type:ignore[assignment] + for line in file: + line = line.strip() if ion_expr.match(line): ion_line_count += 1 @@ -4294,16 +4250,16 @@ def _read(self, filename: PathLike, parsed_kpoints: set[tuple[Kpoint]] | None = headers.pop(0) headers.pop(-1) - data = defaultdict(lambda: np.zeros((n_kpoints, n_bands, n_ions, len(headers)))) # type:ignore[arg-type, type-var] + data = defaultdict(lambda: np.zeros((n_kpoints, n_bands, n_ions, len(headers)))) phase_factors = defaultdict( - lambda: np.full( # type:ignore[type-var] - (n_kpoints, n_bands, n_ions, len(headers)), # type:ignore[arg-type] + lambda: np.full( + (n_kpoints, n_bands, n_ions, len(headers)), np.nan, dtype=np.complex128, ) ) if self.is_soc: # dict keys are now "x", "y", "z" rather than Spin.up/down - xyz_data = defaultdict(lambda: np.zeros((n_kpoints, n_bands, n_ions, len(headers)))) # type:ignore[arg-type, type-var] + xyz_data = defaultdict(lambda: np.zeros((n_kpoints, n_bands, n_ions, len(headers)))) elif expr.match(line): tokens = line.split() @@ -4357,7 +4313,7 @@ def _read(self, filename: PathLike, parsed_kpoints: set[tuple[Kpoint]] | None = self.nions = n_ions # attributes that should be consistent between multiple files are set here if self.orbitals is not None and self.orbitals != headers: # multiple PROCARs but orbitals mismatch! raise ValueError(f"Mismatch in orbital headers in supplied PROCARs: {headers} vs {self.orbitals}!") - self.orbitals = headers # type:ignore[assignment] + self.orbitals = headers if self.nspins is not None and self.nspins != len(data): # parsing multiple PROCARs but nspins mismatch! raise ValueError("Mismatch in number of spin channels in supplied PROCARs!") self.nspins = len(data) @@ -4430,7 +4386,7 @@ def get_occupation(self, atom_index: int, orbital: str) -> dict: of that orbital is returned. Returns: - dict: Sum occupation of orbital of atom. + Sum occupation of orbital of atom. """ if self.orbitals is None: raise ValueError("orbitals is None") @@ -4447,17 +4403,18 @@ def get_occupation(self, atom_index: int, orbital: str) -> dict: class Oszicar: """OSZICAR parser for VASP. - In general, while OSZICAR is useful for a quick look at the output from a VASP run, - we recommend using the Vasprun parser instead, which gives far richer information. + In general, while OSZICAR is useful for a quick look at the + output from a VASP run, we recommend using the Vasprun parser + instead, which gives far richer information. Attributes: - electronic_steps (list[list[dict]]): All electronic steps. e.g. + electronic_steps (list): All electronic steps as a list of list of dict. e.g. [[{"rms": 160.0, "E": 4507.24605593, "dE": 4507.2, "N": 1, "deps": -17777.0, "ncg": 16576}, ...], [....] where electronic_steps[index] refers the list of electronic steps in one ionic_step, electronic_steps[index][subindex] refers to a particular electronic step at subindex in ionic step at index. The dict of properties depends on the type of VASP run, but in general, "E", "dE" and "rms" should be present in almost all runs. - ionic_steps (list[dict[str, float]]): All ionic_steps, e.g. + ionic_steps (list): All ionic_steps as a list of dict, e.g. [{"dE": -526.36, "E0": -526.36024, "mag": 0.0, "F": -526.36024}, ...] This is the typical output from VASP at the end of each ionic step. The stored dict might be different depending on the type of VASP run. @@ -4483,8 +4440,7 @@ def smart_convert(header: str, num: float | str) -> float | str: header: list = [] with zopen(filename, mode="rt", encoding="utf-8") as file: - line: str - for line in file: # type:ignore[assignment] + for line in file: if match := electronic_pattern.match(line.strip()): tokens = match[1].split() data = {header[idx]: smart_convert(header[idx], tokens[idx]) for idx in range(len(tokens))} @@ -4592,8 +4548,8 @@ class Xdatcar: """XDATCAR parser. Only tested with VASP 5.x files. Attributes: - structures (list[Structure]): Structures parsed from XDATCAR. - comment (str): Optional comment. + structures (list): List of structures parsed from XDATCAR. + comment (str): Optional comment string. Authors: Ram Balachandran """ @@ -4610,8 +4566,8 @@ def __init__( Args: filename (PathLike): The XDATCAR file. - ionicstep_start (int): Starting index of ionic step. - ionicstep_end (int): Ending index of ionic step. + ionicstep_start (int): Starting number of ionic step. + ionicstep_end (int): Ending number of ionic step. comment (str): Optional comment attached to this set of structures. """ preamble = None @@ -4683,27 +4639,6 @@ def __init__( def __str__(self) -> str: return self.get_str() - def __len__(self) -> int: - return len(self.structures) - - def __iter__(self) -> Iterator[Structure]: - """Iterator of Xdatcar, yielding a pymatgen Structure.""" - for idx in range(len(self)): - yield self.structures[idx] - - def __getitem__(self, frames: int | slice | list[int] | np.ndarray) -> Structure | list[Structure]: - """Get a subset of the Xdatcar. - - Args: - frames (int, slice, list of int, or numpy Array): Indices of the Xdatcar to return. - - Returns: - Structure, if frames is an int; otherwise, a list of Structure - """ - if isinstance(frames, int | slice): - return self.structures[frames] - return [self.structures[idx] for idx in frames] - @property def site_symbols(self) -> list[str]: """Sequence of symbols associated with the Xdatcar. @@ -4764,19 +4699,19 @@ def concatenate( else: preamble.append(line) elif line == "" or "Direct configuration=" in line: - poscar = Poscar.from_str("\n".join([*preamble, "Direct", *coords_str])) # type:ignore[list-item] - if (ionicstep_end is None and ionicstep_cnt >= ionicstep_start) or ( - ionicstep_end is not None and ionicstep_start <= ionicstep_cnt < ionicstep_end - ): + poscar = Poscar.from_str("\n".join([*preamble, "Direct", *coords_str])) + if ( + ionicstep_end is None and ionicstep_cnt >= ionicstep_start + ) or ionicstep_start <= ionicstep_cnt < ionicstep_end: structures.append(poscar.structure) ionicstep_cnt += 1 coords_str = [] else: - coords_str.append(line) # type:ignore[arg-type] + coords_str.append(line) if preamble is None: raise ValueError("preamble is None") - poscar = Poscar.from_str("\n".join([*preamble, "Direct", *coords_str])) # type:ignore[list-item] + poscar = Poscar.from_str("\n".join([*preamble, "Direct", *coords_str])) if ( (ionicstep_end is None and ionicstep_cnt >= ionicstep_start) @@ -4791,15 +4726,12 @@ def get_str( ionicstep_end: int | None = None, significant_figures: int = 8, ) -> str: - """Get Xdatcar as a string. + """Write Xdatcar to a string. Args: - ionicstep_start (int): Starting index of ionic step. - ionicstep_end (int): Ending index of ionic step. + ionicstep_start (int): Starting number of ionic step. + ionicstep_end (int): Ending number of ionic step. significant_figures (int): Number of significant digits. - - Returns: - str: Xdatcar as a string. """ if ionicstep_start < 1: raise ValueError("Start ionic step cannot be less than 1") @@ -4838,7 +4770,7 @@ def write_file(self, filename: PathLike, **kwargs) -> None: method and are passed through directly. """ with zopen(filename, mode="wt", encoding="utf-8") as file: - file.write(self.get_str(**kwargs)) # type:ignore[arg-type] + file.write(self.get_str(**kwargs)) class Dynmat: @@ -4849,7 +4781,7 @@ class Dynmat: [atom ][disp ]['dispvec'] = displacement vector (part of first line in dynmat block, e.g. "0.01 0 0") [atom ][disp ]['dynmat'] = - list of dynmat lines for this atom and this displacement + list of dynmat lines for this atom and this displacement Authors: Patrick Huck """ @@ -4860,7 +4792,7 @@ def __init__(self, filename: PathLike) -> None: filename: Name of file containing DYNMAT. """ with zopen(filename, mode="rt", encoding="utf-8") as file: - lines: list[str] = list(clean_lines(file.readlines())) # type:ignore[arg-type] + lines = list(clean_lines(file.readlines())) self._nspecs, self._natoms, self._ndisps = map(int, lines[0].split()) self._masses = map(float, lines[1].split()) self.data: dict[int, dict] = defaultdict(dict) @@ -4879,16 +4811,13 @@ def __init__(self, filename: PathLike) -> None: self.data[atom][disp]["dynmat"] = [] # type: ignore[index] self.data[atom][disp]["dynmat"].append(v) # type: ignore[index] - def get_phonon_frequencies(self) -> list[float]: + def get_phonon_frequencies(self) -> list: """Calculate phonon frequencies. WARNING: This method is most likely incorrect or suboptimal, - hence for demonstration purposes only. - - Returns: - list[float]: phonon frequencies. + hence for demonstration purposes only. """ - frequencies: list[float] = [] + frequencies = [] for k, v0 in self.data.items(): for v1 in v0.values(): vec = map(abs, v1["dynmat"][k - 1]) @@ -4949,7 +4878,6 @@ def get_adjusted_fermi_level( """ # Make a working copy of band_structure bs_working = BandStructureSymmLine.from_dict(band_structure.as_dict()) - if bs_working.is_metal(): energy = efermi while energy < cbm: @@ -4986,18 +4914,19 @@ class Wavecar: (https://doi.org/10.1103/PhysRevMaterials.1.065001). Attributes: - vasp_type ("std" | "gam" | "ncl"): The VASP type WAVECAR was generated with. + vasp_type (str): String that determines VASP type the WAVECAR was generated with. + One of 'std', 'gam', 'ncl'. nk (int): Number of k-points from the WAVECAR. nb (int): Number of bands per k-point. encut (float): Energy cutoff (used to define G_{cut}). efermi (float): Fermi energy. - a (NDArray): Primitive lattice vectors of the cell (e.g. a_1 = self.a[0, :]). - b (NDArray): Reciprocal lattice vectors of the cell (e.g. b_1 = self.b[0, :]). + a (np.array): Primitive lattice vectors of the cell (e.g. a_1 = self.a[0, :]). + b (np.array): Reciprocal lattice vectors of the cell (e.g. b_1 = self.b[0, :]). vol (float): The volume of the unit cell in real space. - kpoints (NDArray): A list of k-points read from the WAVECAR file. - band_energy (list): A list of band eigenenergies (and corresponding occupancies) for each kpoint, + kpoints (np.array): The list of k-points read from the WAVECAR file. + band_energy (list): The list of band eigenenergies (and corresponding occupancies) for each kpoint, where the first index corresponds to the index of the k-point (e.g. self.band_energy[kp]). - Gpoints (list): A list of generated G-points for each k-point (a double list), which + Gpoints (list): The list of generated G-points for each k-point (a double list), which are used with the coefficients for each k-point and band to recreate the wavefunction (e.g. self.Gpoints[kp] is the list of G-points for k-point kp). The G-points depend on the k-point and reciprocal lattice @@ -5005,7 +4934,7 @@ class Wavecar: G-point is represented by integer multipliers (e.g. assuming Gpoints[kp][n] == [n_1, n_2, n_3], then G_n = n_1*b_1 + n_2*b_2 + n_3*b_3) - coeffs (list): A list of coefficients for each k-point and band for reconstructing the wavefunction. + coeffs (list): The list of coefficients for each k-point and band for reconstructing the wavefunction. For non-spin-polarized, the first index corresponds to the kpoint and the second corresponds to the band (e.g. self.coeffs[kp][b] corresponds to k-point kp and band b). For spin-polarized calculations, the first index is for the spin. If the calculation was non-collinear, then self.coeffs[kp][b] will have @@ -5246,7 +5175,7 @@ def _generate_nbmax(self) -> None: def _generate_G_points( self, - kpoint: NDArray, + kpoint: np.ndarray, gamma: bool = False, ) -> tuple[list, list, list]: """Helper method to generate G-points based on nbmax. @@ -5257,12 +5186,12 @@ def _generate_G_points( initialization. Args: - kpoint (NDArray): The current k-point value. + kpoint (np.array): the array containing the current k-point value gamma (bool): determines if G points for gamma-point only executable - should be generated. + should be generated Returns: - tuple[list, list, list]: Valid G-points + A tuple containing valid G-points """ kmax = self._nbmax[0] + 1 if gamma else 2 * self._nbmax[0] + 1 @@ -5294,7 +5223,7 @@ def evaluate_wavefunc( self, kpoint: int, band: int, - r: NDArray, + r: np.ndarray, spin: int = 0, spinor: int = 0, ) -> np.complex64: @@ -5316,7 +5245,7 @@ def evaluate_wavefunc( Args: kpoint (int): the index of the kpoint where the wavefunction will be evaluated. band (int): the index of the band where the wavefunction will be evaluated. - r (NDArray): the position where the wavefunction will be evaluated. + r (np.array): the position where the wavefunction will be evaluated. spin (int): spin index for the desired wavefunction (only for ISPIN = 2, default = 0). spinor (int): component of the spinor that is evaluated (only used @@ -5346,7 +5275,7 @@ def fft_mesh( spin: int = 0, spinor: int = 0, shift: bool = True, - ) -> NDArray: + ) -> np.ndarray: """Place the coefficients of a wavefunction onto an fft mesh. Once the mesh has been obtained, a discrete fourier transform can be @@ -5433,7 +5362,7 @@ def get_parchg( Returns: A Chgcar object. """ - if phase and not np.allclose(self.kpoints[kpoint], 0.0): + if phase and not np.all(self.kpoints[kpoint] == 0.0): warnings.warn( "phase is True should only be used for the Gamma kpoint! I hope you know what you're doing!", stacklevel=2, @@ -5532,7 +5461,7 @@ class Eigenval: nbands (int): Number of bands. kpoints (list): List of kpoints. kpoints_weights (list): Weights of each kpoint in the BZ, should sum to 1. - eigenvalues (dict): Eigenvalues as a dict of {(spin): NDArray(shape=(nkpt, nbands, 2))}. + eigenvalues (dict): Eigenvalues as a dict of {(spin): np.ndarray(shape=(nkpt, nbands, 2))}. This representation is based on actual ordering in VASP and is meant as an intermediate representation to be converted into proper objects. The kpoint index is 0-based (unlike the 1-based indexing in VASP). """ @@ -5688,8 +5617,8 @@ class Waveder(MSONable): Author: Miguel Dias Costa, Kamal Choudhary, Jimmy-Xuan Shen """ - cder_real: NDArray - cder_imag: NDArray + cder_real: np.ndarray + cder_imag: np.ndarray @classmethod def from_formatted(cls, filename: PathLike) -> Self: @@ -5766,7 +5695,7 @@ def read_data(dtype): return cls(cder_data.real, cder_data.imag) @property - def cder(self) -> NDArray: + def cder(self) -> np.ndarray: """The complex derivative of the orbitals with respect to k.""" if self.cder_real.shape[0] != self.cder_real.shape[1]: # pragma: no cover warnings.warn( @@ -5845,11 +5774,11 @@ class WSWQ(MSONable): nspin: int nkpoints: int nbands: int - me_real: NDArray - me_imag: NDArray + me_real: np.ndarray + me_imag: np.ndarray @property - def data(self) -> NDArray: + def data(self) -> np.ndarray: """Complex overlap matrix.""" return self.me_real + 1j * self.me_imag @@ -5910,572 +5839,3 @@ def from_file(cls, filename: str) -> Self: class UnconvergedVASPWarning(Warning): """Warning for unconverged VASP run.""" - - -@requires(h5py is not None, "h5py must be installed to read vaspout.h5") -class Vaspout(Vasprun): - """ - Class to read vaspout.h5 files. - - This class inherits from Vasprun, as the vaspout.h5 file is intended - to be a forward-looking replacement for vasprun.xml. - - Thus to later accommodate a smooth transition to vaspout.h5, this class - uses the same structure as Vasprun but overrides many of its methods. - - Parameters - ----------- - filename : str or Path - The name of the vaspout.h5 file to parse, can be compressed. - occu_tol: float = 1e-8 - Sets the minimum tol for the determination of the - vbm and cbm. Usually the default of 1e-8 works well enough, - but there may be pathological cases. - parse_dos : bool = True - Whether to parse the dos. Defaults to True. Set - to False to shave off significant time from the parsing if you - are not interested in getting those data. - parse_eigen : bool = True - Whether to parse the eigenvalues. Defaults to - True. Set to False to shave off significant time from the - parsing if you are not interested in getting those data. - parse_projected_eigen : bool = False - Whether to parse the projected - eigenvalues and magnetization. Defaults to False. Set to True to obtain - projected eigenvalues and magnetization. **Note that this can take an - extreme amount of time and memory.** So use this wisely. - separate_spins : bool - Whether the band gap, CBM, and VBM should be - reported for each individual spin channel. Defaults to False, - which computes the eigenvalue band properties independent of - the spin orientation. If True, the calculation must be spin-polarized. - store_potcar : bool - Whether to store the full POTCAR data. - """ - - def __init__( - self, - filename: str | Path, - occu_tol: float = 1e-8, - parse_dos: bool = True, - parse_eigen: bool = True, - parse_projected_eigen: bool = False, - separate_spins: bool = False, - store_potcar: bool = True, - ) -> None: - self.filename = str(filename) - self.occu_tol = occu_tol - self.separate_spins = separate_spins - self.store_potcar = store_potcar - - self._parse(parse_dos, parse_eigen, parse_projected_eigen) - - @classmethod - def _parse_hdf5_value(cls, val: Any) -> Any: - """ - Parse HDF5 values recursively, turning it into a dict-like entry. - - This could be a staticmethod, but a recursive staticmethod seems to only - work in python >= 3.10. Using a classmethod to work with 3.9. - - Args: - val (Any), input value - Returns: - Any, output value. Recursion is performed until a bytes-like object is input. - """ - if hasattr(val, "items"): - val = {k: cls._parse_hdf5_value(v) for k, v in val.items()} - else: - val = np.array(val).tolist() - if isinstance(val, bytes): - val = val.decode() - elif isinstance(val, list): - val = [cls._parse_hdf5_value(x) for x in val] - return val - - def _parse(self, parse_dos: bool, parse_eigen: bool, parse_projected_eigen: bool) -> None: # type: ignore[override] - """ - Parse data contained in vaspout.h5. - - Args: - parse_dos (bool) - Whether to parse the DOS - parse_eigen (bool) - Whether to parse the bandstructure / electronic eigenvalues - parse_projected_eigen (bool) - Whether to parse the projected bandstructure. - TODO: this information is not currently included in vaspout.h5, add later? - """ - with zopen(self.filename, "rb") as vout_file, h5py.File(vout_file, "r") as h5_file: - # Loading only certain blocks into memory at a given time to lessen memory usage - vasp_version = self._parse_hdf5_value(h5_file["version"]) - self._parse_params(self._parse_hdf5_value(h5_file["input"])) - self._get_ionic_steps(self._parse_hdf5_value(h5_file["intermediate"]["ion_dynamics"])) - - self.bandgap_props: dict[str, dict[str, BandgapProps]] | None = None - if h5_file["intermediate"].get("band"): - self.bandgap_props = self._parse_bandgap_props(self._parse_hdf5_value(h5_file["intermediate"]["band"])) - - # ----- - # TODO: determine if these following fields are stored in vaspout.h5 - self.md_data = [] - # ----- - - outputs = self._parse_hdf5_value(h5_file["results"]) - - self._parse_results(outputs) - - if parse_dos: - try: - self._parse_dos( - electron_dos=outputs["electron_dos"], - projectors=outputs.get("projectors", {}).get("lchar", None), - ) - self.dos_has_errors = False - - if outputs.get("electron_dos_kpoints_opt"): - self._parse_dos( - electron_dos=outputs["electron_dos"], - projectors=outputs.get("projectors_kpoints_opt", {}).get("lchar", None), - kpoints_opt=True, - ) - - except Exception: - self.dos_has_errors = True - - if parse_eigen: - self.eigenvalues = self._parse_eigen(outputs["electron_eigenvalues"]) - if (eigv := outputs.get("electron_eigenvalues_kpoints_opt")) and self.kpoints_opt_props: - self.kpoints_opt_props.eigenvalues = self._parse_eigen( - eigv, - ispin=outputs["electron_eigenvalues"]["ispin"], - nb_tot=outputs["electron_eigenvalues"]["nb_tot"], - ) - - self.projected_eigenvalues = None - self.projected_magnetisation = None - if parse_projected_eigen: - # TODO: are these contained in vaspout.h5? - self.projected_eigenvalues = None - self.projected_magnetisation = None - - self.vasp_version = ".".join(f"{vasp_version.get(tag, '')}" for tag in ("major", "minor", "patch")) - - # TODO: are the other generator tags, like computer platform, stored in vaspout.h5? - self.generator = {"version": self.vasp_version} # type:ignore[assignment] - - @staticmethod - def _parse_structure(positions: dict) -> Structure: # type: ignore[override] - """ - Parse the structure from vaspout format. - - Args: - positions (dict), dict representation of POSCAR - Returns: - pymatgen Structure - """ - species = [] - for ispecie, specie in enumerate(positions["ion_types"]): - species += [specie for _ in range(positions["number_ion_types"][ispecie])] - - # TODO : figure out how site_properties are stored in vaspout - site_properties: dict[str, list] = {} - if positions["selective_dynamics"] == 1: - site_properties["selective_dynamics"] = [] - - return Structure( - lattice=Lattice(positions["scale"] * np.array(positions["lattice_vectors"])), - species=species, - coords=positions["position_ions"], - coords_are_cartesian=(positions["direct_coordinates"] == 1), - ) - - @staticmethod - def _parse_kpoints(kpoints: dict) -> tuple[Kpoints, list | None, list | None]: # type: ignore[override] - _kpoints_style_from_mode = { - KpointsSupportedModes.Reciprocal: {"mode": "e", "coordinate_space": "R"}, - KpointsSupportedModes.Automatic: {"mode": "a"}, - KpointsSupportedModes.Gamma: {"mode": "g"}, - KpointsSupportedModes.Line_mode: {"mode": "l"}, - KpointsSupportedModes.Monkhorst: {"mode": "m"}, - } - kpts = {"comment": kpoints.get("system", "Unknown")} - - for kpoints_style, props in _kpoints_style_from_mode.items(): - if all(kpoints.get(k) == v for k, v in props.items()): - kpts["style"] = kpoints_style - break - - if not kpts.get("style"): - raise ValueError("Could not identify KPOINTS style.") - - if coord_type := kpoints.get("coordinate_space"): - kpts["coord_type"] = "Reciprocal" if coord_type == "R" else "Cartesian" - - actual_kpoints = None - actual_kpoint_weights = None - if kpoints.get("coordinates_kpoints"): - kpts["num_kpts"] = len(kpoints["coordinates_kpoints"]) - kpts["labels"] = [None for _ in range(kpts["num_kpts"])] - for i, idx in enumerate(kpoints.get("positions_labels_kpoints", [])): - kpts["labels"][idx - 1] = kpoints["labels_kpoints"][i] - - kpts["kpts"] = kpoints["coordinates_kpoints"] - actual_kpoints = kpoints["coordinates_kpoints"] - - # NB: no weights for KPOINTS_OPT - actual_kpoint_weights = kpoints.get("weights_kpoints", [1.0 for _ in range(len(actual_kpoints))]) - - elif all(kpoints.get(f"nkp{axis}") for axis in ("x", "y", "z")): - kpts["num_kpts"] = kpoints["number_kpoints"] - kpts["kpts"] = [[kpoints[f"nkp{axis}"] for axis in ("x", "y", "z")]] - - return ( - Kpoints(**kpts), - actual_kpoints, - actual_kpoint_weights, - ) - - @staticmethod - def _parse_atominfo(composition: Composition): - # TODO: this function seems irrelevant but is used in Vasprun, do we need this? - atom_symbols = [] - for element in composition: - atom_symbols += [str(element) for _ in range(int(composition[element]))] - return atom_symbols - - def _parse_params(self, input_data: dict): # type: ignore[override] - self.incar = Incar(input_data["incar"]) - - # TODO: set defaults in parameters to match vasprun? - self.parameters = Incar.from_dict(self.incar.as_dict()) # type: ignore[attr-defined] - - self.kpoints: list[Any] | None = None # type: ignore[assignment] - self.actual_kpoints: list[Any] | None = None # type: ignore[assignment] - self.actual_kpoints_weights: Sequence[float] = None # type: ignore[assignment] - if not self.incar.get("KSPACING"): - ( - self.kpoints, - self.actual_kpoints, - self.actual_kpoints_weights, - ) = self._parse_kpoints(input_data["kpoints"]) # type: ignore[assignment] - - self.kpoints_opt_props: None | KpointOptProps = None - if input_data.get("kpoints_opt"): - self.kpoints_opt_props = KpointOptProps() - ( - self.kpoints_opt_props.kpoints, - self.kpoints_opt_props.actual_kpoints, - self.kpoints_opt_props.actual_kpoints_weights, - ) = self._parse_kpoints(input_data["kpoints_opt"]) - - self.initial_structure = self._parse_structure(input_data["poscar"]) - self.atomic_symbols = self._parse_atominfo(self.initial_structure.composition) - - self.potcar = None - self.potcar_symbols = [] - self.potcar_spec = [] - if input_data["potcar"].get("content"): - # Unmodified vaspout.h5 with full POTCAR - calc_potcar = Potcar.from_str(input_data["potcar"]["content"]) - self.potcar = calc_potcar if self.store_potcar else None - # The `potcar_symbols` attr is extraordinarily confusingly - # named, these are really TITELs # codespell:ignore - self.potcar_symbols = [potcar.TITEL for potcar in calc_potcar] - - # For parity with vasprun.xml, we do not store the POTCAR symbols in - # the vaspout.h5 POTCAR spec. These are derived from the TITEL fields - # and are thus redundant. - self.potcar_spec = [p.spec(extra_spec=[]) for p in calc_potcar] - - elif input_data["potcar"].get("spec"): - # modified vaspout.h5 with only POTCAR spec - import json - - self.potcar_spec = json.loads(input_data["potcar"]["spec"]) - self.potcar_symbols = [spec["titel"] for spec in self.potcar_spec] - - # TODO: do we want POSCAR stored? - self.poscar = Poscar( - structure=self.initial_structure, - comment=input_data["poscar"].get("system"), - selective_dynamics=self.initial_structure.site_properties.get("selective_dynamics"), - velocities=self.initial_structure.site_properties.get("velocities"), - ) - - def _get_ionic_steps(self, ion_dynamics) -> None: - # use same key accession as in vasprun.xml - vasp_key_to_pmg = { - "free energy TOTEN": "e_fr_energy", - "energy without entropy": "e_wo_entrp", - "energy(sigma->0)": "e_0_energy", - } - - # label s, p, d,... contributions to charge and magnetic moment in same way as Outcar - _to_outcar_tag = { - "total charge": "charge", # older style - "magnetization (x)": "magnetization", - "charge": "charge", # newer style - "x": "magnetization", - } - - self.nionic_steps = len(ion_dynamics["energies"]) - self.ionic_steps = [] - - ionic_step_keys = [ - key - for key in ( - "forces", - "stresses", - ) - if ion_dynamics.get(key) - ] - - for istep in range(self.nionic_steps): - step = { - **{ - vasp_key_to_pmg[ion_dynamics["energies_tags"][ivalue]]: value - for ivalue, value in enumerate(ion_dynamics["energies"][istep]) - }, - **{key: ion_dynamics[key][istep] for key in ionic_step_keys}, - "structure": Structure( - lattice=Lattice(ion_dynamics["lattice_vectors"][istep]), - species=self.initial_structure.species, - coords=ion_dynamics["position_ions"][istep], - coords_are_cartesian=False, # TODO check this is always False - ), - # Placeholder - there's currently no info about electronic steps - # in vaspout.h5 - "electronic_steps": [], - } - if chg_dens_props := ion_dynamics.get("magnetism"): - old_style = chg_dens_props.get("component_tags", []) # Appears to be VASP <= 6.4.2 - new_style = chg_dens_props.get("spin_moments", {}) # Appears to be VASP >= 6.4.3 - if old_style: - components = old_style - moments = chg_dens_props["moments"] - orbitals = chg_dens_props["orbital_tags"] - - elif new_style: - components = new_style["components"] - moments = new_style.get("values") - orbitals = new_style["orbitals"] - else: - warnings.warn( - "Unknown format for the on-site charges and magnetic moments in vaspout.h5.", stacklevel=2 - ) - - if old_style or new_style: - for ik, k in enumerate(components): # pyright: ignore[reportPossiblyUnboundVariable] - site_prop = [ - {orb: moments[istep][ik][iion][iorb] for iorb, orb in enumerate(orbitals)} # pyright: ignore[reportPossiblyUnboundVariable] - for iion in range(len(self.poscar.structure)) - ] - for iion in range(len(self.poscar.structure)): - site_prop[iion]["tot"] = sum(site_prop[iion].values()) - step["structure"].add_site_property(_to_outcar_tag.get(k), site_prop) - - self.ionic_steps += [step] - - def _parse_results(self, results: dict) -> None: - self.final_structure = self._parse_structure(results["positions"]) - - def _parse_dos(self, electron_dos: dict, projectors: list | None = None, kpoints_opt: bool = False): # type: ignore[override] - dos = {"efermi": electron_dos["efermi"]} - densities: dict = {} - for dos_type in ( - "dos", - "dosi", - ): - if electron_dos.get(dos_type): - densities[dos_type] = {} - for ispin in range(len(electron_dos[dos_type])): - densities[dos_type][Spin((-1) ** ispin)] = electron_dos[dos_type][ispin] - - dos["tdos"] = Dos(dos["efermi"], electron_dos["energies"], densities["dos"]) - dos["idos"] = Dos(dos["efermi"], electron_dos["energies"], densities["dosi"]) - - dos["pdos"] = [] - - # for whatever reason, the naming of orbitals is different in vaspout.h5 - vasp_to_pmg_orb = { - "x2-y2": "dx2", - "fy3x2": "f_3", - "fxyz": "f_2", - "fyz2": "f_1", - "fz3": "f0", - "fxz2": "f1", - "fzx2": "f2", - "fx3": "f3", - } - - if pdos := electron_dos.get("dospar"): - projectors = projectors or [] - projectors = [char.strip() for char in projectors] - orbtyp = Orbital if any("x" in char for char in projectors) else OrbitalType - for site_pdos in pdos: - site_res_pdos: dict = defaultdict(dict) - for ispin in range(len(site_pdos)): - for ilm in range(len(site_pdos[ispin])): - orb_str = projectors[ilm] - orb_idx = orbtyp.__members__[vasp_to_pmg_orb.get(orb_str, orb_str)] - site_res_pdos[orb_idx][Spin((-1) ** ispin)] = np.array(site_pdos[ispin][ilm]) - dos["pdos"] += [site_res_pdos] - - if kpoints_opt: - for k, v in dos.items(): - setattr(self.kpoints_opt_props, k, v) - else: - for k, v in dos.items(): - setattr(self, k, v) - - @staticmethod - def _parse_eigen(eigenvalues_complete: dict, ispin: int | None = None, nb_tot: int | None = None): # type: ignore[override] - eigenvalues = {} - ispin = ispin or eigenvalues_complete["ispin"] - nb_tot = nb_tot or eigenvalues_complete["nb_tot"] - nkpoints = eigenvalues_complete.get("kpoints") or len(eigenvalues_complete.get("kpoint_coords", [])) - for i_spin in range(ispin): - eigenvalues[Spin.up if i_spin == 0 else Spin.down] = np.array( - [ - [ - [ - eigenvalues_complete["eigenvalues"][i_spin][i][j], - eigenvalues_complete["fermiweights"][i_spin][i][j], - ] - for j in range(nb_tot) - ] - for i in range(nkpoints) - ] - ) - return eigenvalues - - @staticmethod - def _parse_bandgap_props(band_props: dict[str, list]) -> dict[str, dict[str, BandgapProps]] | None: - """ - Parse the bandgap properties VASP calculates. - - These are a dict with the following keys: - gap_from_kpoint, gap_from_weight, labels - - `labels` are the corresponding labels of the values in gap_from_* - - The lists in gap_from_* have shape (1, ISPIN + 1, len(band_props['labels']) ) - The specific order of spins in the middle index are: total, spin-up, spin-down - - - """ - - known_gap_keys = ("gap_from_kpoint", "gap_from_weight") - spin_keys: tuple[str, ...] = tuple() - for ref_k in known_gap_keys: - if bps := band_props.get(ref_k, []): - if len(bps[0]) == 1: - spin_keys = ("total",) - elif len(bps[0]) == 3: - spin_keys = ( - "total", - Spin.up.name, - Spin.down.name, - ) - else: - warnings.warn(f"Unknown bandgap property shape {len(band_props)} in vaspout.h5.", stacklevel=2) - break - - if len(spin_keys) == 0: - return None - - bg_props: dict[str, dict[str, BandgapProps]] = {} - for k in [x for x in known_gap_keys if band_props.get(x)]: - bg_props[k] = {} - for i, spin in enumerate(spin_keys): - vals = {label: band_props[k][0][i][idx] for idx, label in enumerate(band_props["labels"])} - - props = { - "vbm": vals.get("valence band maximum"), - "cbm": vals.get("conduction band minimum"), - } - - if all(dgv := tuple(vals.get(x) for x in ("direct gap bottom", "direct gap top"))): - props["direct_gap_eigenvalues"] = dgv - - for band_pos, band_pos_label in { - "VBM": "vbm", - "CBM": "cbm", - "direct": "direct_gap", - }.items(): - if all(kv := tuple(vals.get(f"{x} ({band_pos})") for x in ("kx", "ky", "kz"))): - props[f"{band_pos_label}_k"] = kv - bg_props[k][spin] = BandgapProps(**props) - - return bg_props - - @property - @unitized("eV") - def final_energy(self): - """Final energy from vaspout.""" - return self.ionic_steps[-1]["e_0_energy"] - - def remove_potcar_and_write_file( - self, filename: str | Path | None = None, fake_potcar_str: str | None = None - ) -> None: - """ - Utility function to replace the full POTCAR with its spec, and write a vaspout.h5. - - This is needed for applications where one might upload VASP output - to a public database. Since vaspout.h5 includes the full POTCAR, it's necessary - to replace it here with just the spec. - - Args: - filename : str, Path, or None (default) - Name of the output file. If None, defaults to self.filename (in-place modification). - fake_potcar_str : str or None (default) - If a str, a POTCAR represented as a str. Used in the context of tests to replace - a POTCAR with a scrambled/fake POTCAR. If None, the Vaspout.potcar Field - ("/input/potcar/content" field of vaspout.h5) is removed. - """ - import json - - def recursive_to_dataset(h5_obj, level, obj): - if hasattr(obj, "items"): - if level != "/": - h5_obj.create_group(level) - for k, v in obj.items(): - recursive_to_dataset(h5_obj[level], k, v) - else: - if isinstance(obj, str): - obj = obj.encode() - data = np.array(obj) - if "U" in str(data.dtype): - data = data.astype("S") - h5_obj.create_dataset(level, data=data) - - filename = filename or self.filename - _, fname_ext = os.path.splitext(filename) # type: ignore[type-var] - - # determine if output file is to be compressed - is_compressed = fname_ext.lower() in {".bz2", ".gz", ".z", ".xz", ".lzma"} - - with zopen(self.filename, "rb") as vout_file, h5py.File(vout_file, "r") as h5_file: - hdf5_data = self._parse_hdf5_value(h5_file) - - if fake_potcar_str: - hdf5_data["input"]["potcar"]["content"] = fake_potcar_str - potcar_spec = [psingle.spec() for psingle in Potcar.from_str(fake_potcar_str)] - else: - del hdf5_data["input"]["potcar"]["content"] - potcar_spec = self.potcar_spec - - # rather than define custom HDF5 hierarchy for POTCAR spec, just dump JSONable dict to str - hdf5_data["input"]["potcar"]["spec"] = json.dumps(potcar_spec) - - # if file is to be compressed, first write uncompressed file - with h5py.File(filename, "w") as h5_file: - recursive_to_dataset(h5_file, "/", hdf5_data) - - # now compress the file - if is_compressed: - with open(filename, "rb") as fin: - byte_data = fin.read() - with zopen(filename, "wb") as fout: - fout.write(byte_data)