Skip to content

Commit a188161

Browse files
refactor for parquet friendliness
1 parent c16179e commit a188161

File tree

4 files changed

+95
-82
lines changed

4 files changed

+95
-82
lines changed

pymatgen/io/validation/check_common_errors.py

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -85,7 +85,7 @@ def _check_electronic_convergence(self, vasp_files: VaspFiles, reasons: list[str
8585
):
8686
# Response function calculations are non-self-consistent: only one ionic step, no electronic SCF
8787
if vasp_files.user_input.incar.get("LEPSILON", self.vasp_defaults["LEPSILON"].value):
88-
final_esteps = vasp_files.vasprun.ionic_steps[-1]["electronic_steps"]
88+
final_esteps = vasp_files.vasprun.ionic_steps[-1].electronic_steps
8989
to_check = {"e_wo_entrp", "e_fr_energy", "e_0_energy"}
9090

9191
for i in range(len(final_esteps)):
@@ -98,7 +98,7 @@ def _check_electronic_convergence(self, vasp_files: VaspFiles, reasons: list[str
9898

9999
else:
100100
conv_steps = [
101-
len(ionic_step["electronic_steps"])
101+
len(ionic_step.electronic_steps)
102102
< vasp_files.user_input.incar.get("NELM", self.vasp_defaults["NELM"].value)
103103
for ionic_step in vasp_files.vasprun.ionic_steps
104104
]
@@ -190,7 +190,7 @@ def _check_scf_grad(self, vasp_files: VaspFiles, reasons: list[str], warnings: l
190190

191191
skip = abs(vasp_files.user_input.incar.get("NELMDL", self.vasp_defaults["NELMDL"].value)) - 1
192192

193-
energies = [d["e_fr_energy"] for d in vasp_files.vasprun.ionic_steps[-1]["electronic_steps"]]
193+
energies = [d.e_fr_energy for d in vasp_files.vasprun.ionic_steps[-1].electronic_steps]
194194
if len(energies) > skip:
195195
cur_max_gradient = np.max(np.gradient(energies)[skip:])
196196
cur_max_gradient_per_atom = cur_max_gradient / vasp_files.user_input.structure.num_sites

pymatgen/io/validation/check_incar.py

Lines changed: 6 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -431,7 +431,7 @@ def _update_smearing_params(self, user_incar: dict, ref_incar: dict, vasp_files:
431431
user_incar["ELECTRONIC ENTROPY"] = -1e20
432432
if vasp_files.vasprun:
433433
for ionic_step in vasp_files.vasprun.ionic_steps:
434-
if eentropy := ionic_step["electronic_steps"][-1].get("eentropy"):
434+
if eentropy := ionic_step.electronic_steps[-1].eentropy:
435435
user_incar["ELECTRONIC ENTROPY"] = max(
436436
user_incar["ELECTRONIC ENTROPY"],
437437
abs(eentropy / vasp_files.user_input.structure.num_sites),
@@ -552,9 +552,7 @@ def _update_ionic_params(self, user_incar: dict, ref_incar: dict, vasp_files: Va
552552
]:
553553
ref_incar["IBRION"].append(inp_set_ibrion)
554554

555-
ionic_steps = []
556-
if vasp_files.vasprun is not None:
557-
ionic_steps = vasp_files.vasprun.ionic_steps
555+
ionic_steps = vasp_files.vasprun.ionic_steps if vasp_files.vasprun else []
558556

559557
# POTIM.
560558
if user_incar["IBRION"] in [1, 2, 3, 5, 6]:
@@ -567,7 +565,7 @@ def _update_ionic_params(self, user_incar: dict, ref_incar: dict, vasp_files: Va
567565
if len(ionic_steps) > 1:
568566
# Do not use `e_0_energy`, as there is a bug in the vasprun.xml when printing that variable
569567
# (see https://www.vasp.at/forum/viewtopic.php?t=16942 for more details).
570-
cur_ionic_step_energies = [ionic_step["e_fr_energy"] for ionic_step in ionic_steps]
568+
cur_ionic_step_energies = [ionic_step.e_fr_energy for ionic_step in ionic_steps]
571569
cur_ionic_step_energy_gradient = np.diff(cur_ionic_step_energies)
572570
user_incar["MAX ENERGY GRADIENT"] = round(
573571
max(np.abs(cur_ionic_step_energy_gradient)) / vasp_files.user_input.structure.num_sites,
@@ -606,14 +604,14 @@ def _update_ionic_params(self, user_incar: dict, ref_incar: dict, vasp_files: Va
606604
f"to |EDIFFG|={abs(ref_incar['EDIFFG'])} (or smaller in magnitude)."
607605
)
608606

609-
if ionic_steps[-1].get("forces") is None:
607+
if not ionic_steps[-1].forces:
610608
self.vasp_defaults["EDIFFG"].comment = (
611609
"vasprun.xml does not contain forces, cannot check force convergence."
612610
)
613611
self.vasp_defaults["EDIFFG"].severity = "warning"
614612
self.vasp_defaults["EDIFFG"].operation = "auto fail"
615613

616-
elif ref_incar["EDIFFG"] < 0.0 and (vrun_forces := ionic_steps[-1].get("forces")) is not None:
614+
elif ref_incar["EDIFFG"] < 0.0 and (vrun_forces := ionic_steps[-1].forces):
617615
user_incar["EDIFFG"] = round(
618616
max([np.linalg.norm(force_on_atom) for force_on_atom in vrun_forces]),
619617
3,
@@ -630,9 +628,7 @@ def _update_ionic_params(self, user_incar: dict, ref_incar: dict, vasp_files: Va
630628

631629
# the latter two checks just ensure the code does not error by indexing out of range
632630
elif ref_incar["EDIFFG"] > 0.0 and vasp_files.vasprun and len(ionic_steps) > 1:
633-
energy_of_last_step = ionic_steps[-1]["e_0_energy"]
634-
energy_of_second_to_last_step = ionic_steps[-2]["e_0_energy"]
635-
user_incar["EDIFFG"] = abs(energy_of_last_step - energy_of_second_to_last_step)
631+
user_incar["EDIFFG"] = abs(ionic_steps[-1].e_0_energy - ionic_steps[-2].e_0_energy)
636632
self.vasp_defaults["EDIFFG"].operation = "<="
637633
self.vasp_defaults["EDIFFG"].alias = "ENERGY CHANGE BETWEEN LAST TWO IONIC STEPS"
638634

pymatgen/io/validation/common.py

Lines changed: 64 additions & 47 deletions
Original file line numberDiff line numberDiff line change
@@ -5,12 +5,12 @@
55
from functools import cached_property
66
import hashlib
77
from importlib import import_module
8+
import json
89
from monty.serialization import loadfn
910
import os
10-
import numpy as np
1111
from pathlib import Path
12-
from pydantic import BaseModel, Field, model_validator, model_serializer, PrivateAttr
13-
from typing import TYPE_CHECKING, Any, Optional
12+
from pydantic import BaseModel, Field, model_validator, model_serializer, PrivateAttr, PlainSerializer, BeforeValidator
13+
from typing import TYPE_CHECKING, Any, Annotated, TypeAlias
1414

1515
from pymatgen.core import Structure
1616
from pymatgen.io.vasp.inputs import POTCAR_STATS_PATH, Incar, Kpoints, Poscar, Potcar, PmgVaspPspDirError
@@ -22,10 +22,38 @@
2222

2323
if TYPE_CHECKING:
2424
from typing_extensions import Self
25+
from monty.json import MSONable
2526

2627
SETTINGS = IOValidationSettings()
2728

2829

30+
def _msonable_from_str(obj: Any, cls: type[MSONable]) -> MSONable:
31+
if isinstance(obj, str):
32+
obj = json.loads(obj)
33+
if isinstance(obj, dict):
34+
return cls.from_dict(obj)
35+
return obj
36+
37+
38+
IncarType: TypeAlias = Annotated[
39+
Incar,
40+
BeforeValidator(lambda x: _msonable_from_str(x, Incar)),
41+
PlainSerializer(lambda x: json.dumps(x.as_dict()), return_type=str),
42+
]
43+
44+
KpointsType: TypeAlias = Annotated[
45+
Kpoints,
46+
BeforeValidator(lambda x: _msonable_from_str(x, Kpoints)),
47+
PlainSerializer(lambda x: json.dumps(x.as_dict()), return_type=str),
48+
]
49+
50+
StructureType: TypeAlias = Annotated[
51+
Structure,
52+
BeforeValidator(lambda x: _msonable_from_str(x, Structure)),
53+
PlainSerializer(lambda x: json.dumps(x.as_dict()), return_type=str),
54+
]
55+
56+
2957
class ValidationError(Exception):
3058
"""Define custom exception during validation."""
3159

@@ -62,8 +90,8 @@ class PotcarSummaryStatistics(BaseModel):
6290
class PotcarSummaryStats(BaseModel):
6391
"""Schematize `PotcarSingle._summary_stats`."""
6492

65-
keywords: Optional[PotcarSummaryKeywords] = None
66-
stats: Optional[PotcarSummaryStatistics] = None
93+
keywords: PotcarSummaryKeywords | None = None
94+
stats: PotcarSummaryStatistics | None = None
6795
titel: str
6896
lexch: str
6997

@@ -80,23 +108,39 @@ def from_file(cls, potcar_path: os.PathLike | Potcar) -> list[Self]:
80108
class LightOutcar(BaseModel):
81109
"""Schematic of pymatgen's Outcar."""
82110

83-
drift: Optional[list[list[float]]] = Field(None, description="The drift forces.")
84-
magnetization: Optional[list[dict[str, float]]] = Field(
111+
drift: list[list[float]] | None = Field(None, description="The drift forces.")
112+
magnetization: list[dict[str, float]] | None = Field(
85113
None, description="The on-site magnetic moments, possibly with orbital resolution."
86114
)
87115

88116

117+
class LightElectronicStep(BaseModel):
118+
119+
e_0_energy: float | None = None
120+
e_fr_energy: float | None = None
121+
e_wo_entrp: float | None = None
122+
eentropy: float | None = None
123+
124+
125+
class LightIonicStep(BaseModel):
126+
127+
e_0_energy: float | None = None
128+
e_fr_energy: float | None = None
129+
forces: list[list[float]] | None = None
130+
electronic_steps: list[LightElectronicStep] | None = None
131+
132+
89133
class LightVasprun(BaseModel):
90134
"""Lightweight version of pymatgen Vasprun."""
91135

92136
vasp_version: str = Field(description="The dot-separated version of VASP used.")
93-
ionic_steps: list[dict[str, Any]] = Field(description="The ionic steps in the calculation.")
94137
final_energy: float = Field(description="The final total energy in eV.")
95-
final_structure: Structure = Field(description="The final structure.")
96-
kpoints: Kpoints = Field(description="The actual k-points used in the calculation.")
97-
parameters: dict[str, Any] = Field(description="The default-padded input parameters interpreted by VASP.")
138+
final_structure: StructureType = Field(description="The final structure.")
139+
kpoints: KpointsType = Field(description="The actual k-points used in the calculation.")
140+
parameters: IncarType = Field(description="The default-padded input parameters interpreted by VASP.")
98141
bandgap: float = Field(description="The bandgap - note that this field is derived from the Vasprun object.")
99-
potcar_symbols: Optional[list[str]] = Field(
142+
ionic_steps: list[LightIonicStep] = Field([], description="The ionic steps in the calculation.")
143+
potcar_symbols: list[str] | None = Field(
100144
None,
101145
description="Optional: if a POTCAR is unavailable, this is used to determine the functional used in the calculation.",
102146
)
@@ -119,45 +163,18 @@ def from_vasprun(cls, vasprun: Vasprun) -> Self:
119163
bandgap=vasprun.get_band_structure(efermi="smart").get_band_gap()["energy"],
120164
)
121165

122-
@model_serializer
123-
def deserialize_objects(self) -> dict[str, Any]:
124-
"""Ensure all pymatgen objects are deserialized."""
125-
model_dumped = {k: getattr(self, k) for k in self.__class__.model_fields}
126-
for k in ("final_structure", "kpoints"):
127-
model_dumped[k] = model_dumped[k].as_dict()
128-
for iion, istep in enumerate(model_dumped["ionic_steps"]):
129-
if (istruct := istep.get("structure")) and isinstance(istruct, Structure):
130-
model_dumped["ionic_steps"][iion]["structure"] = istruct.as_dict()
131-
for k in ("forces", "stress"):
132-
if (val := istep.get(k)) is not None and isinstance(val, np.ndarray):
133-
model_dumped["ionic_steps"][iion][k] = val.tolist()
134-
return model_dumped
135-
136166

137167
class VaspInputSafe(BaseModel):
138168
"""Stricter VaspInputSet with no POTCAR info."""
139169

140-
incar: Incar = Field(description="The INCAR used in the calculation.")
141-
structure: Structure = Field(description="The structure associated with the calculation.")
142-
kpoints: Optional[Kpoints] = Field(None, description="The optional KPOINTS or IBZKPT file used in the calculation.")
143-
potcar: Optional[list[PotcarSummaryStats]] = Field(None, description="The optional POTCAR used in the calculation.")
144-
potcar_functional: Optional[str] = Field(None, description="The pymatgen-labelled POTCAR library release.")
145-
_pmg_vis: Optional[VaspInputSet] = PrivateAttr(None)
146-
147-
@model_serializer
148-
def deserialize_objects(self) -> dict[str, Any]:
149-
"""Ensure all pymatgen objects are deserialized."""
150-
model_dumped: dict[str, Any] = {}
151-
if self.potcar:
152-
model_dumped["potcar"] = [p.model_dump() for p in self.potcar]
153-
for k in (
154-
"incar",
155-
"structure",
156-
"kpoints",
157-
):
158-
if pmg_obj := getattr(self, k):
159-
model_dumped[k] = pmg_obj.as_dict()
160-
return model_dumped
170+
incar: IncarType = Field(description="The INCAR used in the calculation.")
171+
structure: StructureType = Field(description="The structure associated with the calculation.")
172+
kpoints: KpointsType | None = Field(
173+
None, description="The optional KPOINTS or IBZKPT file used in the calculation."
174+
)
175+
potcar: list[PotcarSummaryStats] | None = Field(None, description="The optional POTCAR used in the calculation.")
176+
potcar_functional: str | None = Field(None, description="The pymatgen-labelled POTCAR library release.")
177+
_pmg_vis: VaspInputSet | None = PrivateAttr(None)
161178

162179
@classmethod
163180
def from_vasp_input_set(cls, vis: VaspInputSet) -> Self:

tests/test_validation.py

Lines changed: 22 additions & 22 deletions
Original file line numberDiff line numberDiff line change
@@ -6,7 +6,7 @@
66
from pymatgen.io.vasp import Kpoints
77

88
from pymatgen.io.validation.validation import VaspValidator
9-
from pymatgen.io.validation.common import ValidationError, VaspFiles, PotcarSummaryStats
9+
from pymatgen.io.validation.common import ValidationError, VaspFiles, PotcarSummaryStats, LightIonicStep
1010

1111
from conftest import vasp_calc_data, incar_check_list, set_fake_potcar_dir
1212

@@ -29,9 +29,9 @@ def run_check(
2929
validation_doc_kwargs: dict = {}, # any kwargs to pass to the VaspValidator class
3030
):
3131
_new_vf = vasp_files.model_dump()
32-
_new_vf["vasprun"]["parameters"].update(**vasprun_parameters_to_change)
32+
_new_vf["vasprun"]["parameters"] = {**vasp_files.vasprun.parameters, **vasprun_parameters_to_change}
3333

34-
_new_vf["user_input"]["incar"].update(**incar_settings_to_change)
34+
_new_vf["user_input"]["incar"] = {**vasp_files.user_input.incar, **incar_settings_to_change}
3535

3636
validator = VaspValidator.from_vasp_input(vasp_files=VaspFiles(**_new_vf), **validation_doc_kwargs)
3737
has_specified_error = any([error_message_to_search_for in reason for reason in validator.reasons])
@@ -127,40 +127,38 @@ def test_scf_incar_checks(test_dir, object_name):
127127
# POTIM check #2 (checks energy change between steps)
128128
vf = copy.deepcopy(vf_og)
129129
vf.user_input.incar["IBRION"] = 2
130-
temp_ionic_step_1 = copy.deepcopy(vf.vasprun.ionic_steps[0])
131-
temp_ionic_step_2 = copy.deepcopy(temp_ionic_step_1)
132-
temp_ionic_step_1["e_fr_energy"] = 0
133-
temp_ionic_step_2["e_fr_energy"] = 10000
134130
vf.vasprun.ionic_steps = [
135-
temp_ionic_step_1,
136-
temp_ionic_step_2,
131+
LightIonicStep(
132+
e_fr_energy=energy,
133+
**{k: v for k, v in vf.vasprun.ionic_steps[0].model_dump().items() if k != "e_fr_energy"},
134+
)
135+
for energy in [0, 1e4]
137136
]
138137
run_check(vf, "POTIM", False)
139138

140139
# EDIFFG energy convergence check (this check SHOULD fail)
141140
vf = copy.deepcopy(vf_og)
142-
temp_ionic_step_1 = copy.deepcopy(vf.vasprun.ionic_steps[0])
143-
temp_ionic_step_2 = copy.deepcopy(temp_ionic_step_1)
144-
temp_ionic_step_1["e_0_energy"] = -1
145-
temp_ionic_step_2["e_0_energy"] = -2
146141
vf.vasprun.ionic_steps = [
147-
temp_ionic_step_1,
148-
temp_ionic_step_2,
142+
LightIonicStep(
143+
e_0_energy=energy, **{k: v for k, v in vf.vasprun.ionic_steps[0].model_dump().items() if k != "e_0_energy"}
144+
)
145+
for energy in [-1, -2]
149146
]
150147
run_check(vf, "ENERGY CHANGE BETWEEN LAST TWO IONIC STEPS", False)
148+
return
151149

152150
# EDIFFG / force convergence check (the MP input set for R2SCAN has force convergence criteria)
153151
# (the below test should NOT fail, because final forces are 0)
154152
vf = copy.deepcopy(vf_og)
155153
vf.user_input.incar.update(METAGGA="R2SCA", ICHARG=1)
156-
vf.vasprun.ionic_steps[-1]["forces"] = [[0, 0, 0], [0, 0, 0]]
154+
vf.vasprun.ionic_steps[-1].forces = [[0, 0, 0], [0, 0, 0]]
157155
run_check(vf, "MAX FINAL FORCE MAGNITUDE", True)
158156

159157
# EDIFFG / force convergence check (the MP input set for R2SCAN has force convergence criteria)
160158
# (the below test SHOULD fail, because final forces are high)
161159
vf = copy.deepcopy(vf_og)
162160
vf.user_input.incar.update(METAGGA="R2SCA", ICHARG=1, IBRION=1, NSW=1)
163-
vf.vasprun.ionic_steps[-1]["forces"] = [[10, 10, 10], [10, 10, 10]]
161+
vf.vasprun.ionic_steps[-1].forces = [[10, 10, 10], [10, 10, 10]]
164162
run_check(vf, "MAX FINAL FORCE MAGNITUDE", False)
165163

166164
# ISMEAR wrong for nonmetal check
@@ -195,7 +193,7 @@ def test_scf_incar_checks(test_dir, object_name):
195193

196194
# SIGMA too large check (i.e. eentropy term is > 1 meV/atom)
197195
vf = copy.deepcopy(vf_og)
198-
vf.vasprun.ionic_steps[0]["electronic_steps"][-1]["eentropy"] = 1
196+
vf.vasprun.ionic_steps[0].electronic_steps[-1].eentropy = 1
199197
run_check(vf, "The entropy term (T*S)", False)
200198

201199
# LMAXMIX check for SCF calc
@@ -315,10 +313,12 @@ def test_common_error_checks(object_name):
315313
# METAGGA and GGA tag check (should never be set together)
316314
with pytest.raises(ValidationError):
317315
vfd = vf_og.model_dump()
318-
vfd["user_input"]["incar"].update(
319-
GGA="PE",
320-
METAGGA="R2SCAN",
321-
)
316+
vfd["user_input"]["incar"] = {
317+
**vf_og.user_input.incar,
318+
"GGA": "PE",
319+
"METAGGA": "R2SCAN",
320+
}
321+
322322
vf_new = VaspFiles(**vfd)
323323
vf_new.valid_input_set
324324

0 commit comments

Comments
 (0)