materialsproject · esoteric-ephemera · May 29, 2025 · Apr 30, 2025 · Apr 30, 2025 · May 1, 2025
diff --git a/.github/workflows/linting.yml b/.github/workflows/linting.yml
@@ -25,7 +25,7 @@ jobs:
         python -m pip install types-requests
     - name: mypy
       run: |
-        mypy --namespace-packages --explicit-package-bases pymatgen
+        mypy pymatgen
     - name: black
       run: |
         black --version

diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml
@@ -52,4 +52,13 @@ repos:
         args:
           - --namespace-packages
           - --explicit-package-bases
-        additional_dependencies: ['types-requests']
+        additional_dependencies: ['types-requests','pydantic>=2.10.0']
+
+  - repo: https://github.com/kynan/nbstripout
+    rev: 0.8.1
+    hooks:
+      - id: nbstripout
+        args:
+          - --drop-empty-cells
+          - --strip-init-cells
+          - --extra-keys=metadata.kernelspec
diff --git a/README.md b/README.md
@@ -17,18 +17,11 @@ Usage
 
 For validating calculations from the raw files, run:
 ```
-from pymatgen.io.validation import ValidationDoc
-validation_doc = ValidationDoc.from_directory(dir_name = path_to_vasp_calculation_directory)
+from pymatgen.io.validation import VaspValidator
+validation_doc = VaspValidator.from_directory(path_to_vasp_calculation_directory)
 ```
 
 In the above case, whether a calculation passes the validator can be accessed via `validation_doc.valid`. Moreover, reasons for an invalidated calculation can be accessed via `validation_doc.reasons` (this will be empty for valid calculations). Last but not least, warnings for potential issues (sometimes minor, sometimes major) can be accessed via `validation_doc.warnings`.
-\
-\
-For validating calculations from `TaskDoc` objects from the [Emmet](https://github.com/materialsproject/emmet) package, run:
-```
-from pymatgen.io.validation import ValidationDoc
-validation_doc = ValidationDoc.from_task_doc(task_doc = my_task_doc)
-```
 
 Contributors
 =====

diff --git a/examples/using_validation_docs.ipynb b/examples/using_validation_docs.ipynb
@@ -2,39 +2,43 @@
  "cells": [
   {
    "cell_type": "code",
-   "execution_count": 1,
-   "id": "b116c8b3-e927-401b-aed8-994fe5279b54",
+   "execution_count": null,
+   "id": "0",
    "metadata": {},
    "outputs": [],
    "source": [
     "from __future__ import annotations\n",
     "\n",
-    "from emmet.core.tasks import TaskDoc\n",
+    "from monty.os.path import zpath\n",
     "from monty.serialization import loadfn\n",
     "import os\n",
+    "from pathlib import Path\n",
     "\n",
-    "from pymatgen.io.validation import ValidationDoc\n",
-    "from pymatgen.io.validation.check_potcar import CheckPotcar\n",
+    "from pymatgen.io.validation.validation import VaspValidator\n",
     "\n",
     "from pymatgen.io.vasp import PotcarSingle, Potcar"
    ]
   },
   {
-   "cell_type": "code",
-   "execution_count": 2,
-   "id": "80064515-1e98-43da-b075-5a4c41ede437",
+   "cell_type": "markdown",
+   "id": "1",
    "metadata": {},
-   "outputs": [],
    "source": [
-    "\"\"\"\n",
     "For copyright reasons, the POTCAR for these calculations cannot be distributed with this file, but its summary stats can.\n",
-    "If you have the POTCAR resources set up in pymatgen, you can regenerate the POTCARs used here by enabling `regen_potcars`\n",
-    "\"\"\"\n",
     "\n",
+    "If you have the POTCAR resources set up in pymatgen, you can regenerate the POTCARs used here by enabling `regen_potcars`"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "2",
+   "metadata": {},
+   "outputs": [],
+   "source": [
     "regen_potcars = True\n",
     "\n",
     "def get_potcar_from_spec(potcar_spec : dict) -> Potcar | None:\n",
-    "    potcar_checker = CheckPotcar()\n",
     "    \n",
     "    for functional in PotcarSingle._potcar_summary_stats:\n",
     "\n",
@@ -47,24 +51,37 @@
     "                \n",
     "            for stats in PotcarSingle._potcar_summary_stats[functional].get(titel_no_spc,[]):\n",
     "                \n",
-    "                if potcar_checker.compare_potcar_stats(spec[\"summary_stats\"], stats):\n",
+    "                if PotcarSingle.compare_potcar_stats(spec[\"summary_stats\"], stats):\n",
     "                    potcar.append(PotcarSingle.from_symbol_and_functional(symbol=symbol, functional=functional))\n",
     "                    matched[ispec] = True\n",
     "                    break\n",
     "                    \n",
     "            if all(matched):\n",
     "                return potcar\n",
     "    \n",
-    "def check_calc(calc_dir : str) -> ValidationDoc:\n",
+    "def check_calc(calc_dir : str | Path) -> VaspValidator:\n",
+    "\n",
+    "    calc_dir = Path(calc_dir)\n",
     "    potcar_filename = None\n",
     "    if regen_potcars:\n",
-    "        potcar = get_potcar_from_spec(loadfn(os.path.join(calc_dir,\"POTCAR.spec.gz\")))\n",
+    "        potcar = get_potcar_from_spec(loadfn(calc_dir / \"POTCAR.spec.gz\"))\n",
     "        if potcar:\n",
-    "            potcar_filename = os.path.join(calc_dir,\"POTCAR.gz\")\n",
+    "            potcar_filename = calc_dir / \"POTCAR.gz\"\n",
     "            potcar.write_file(potcar_filename)\n",
     "    \n",
-    "    valid_doc = ValidationDoc.from_directory(calc_dir, check_potcar=(regen_potcars and potcar))\n",
+    "    vasp_files = {\n",
+    "        k.lower().split(\".\")[0] : zpath(calc_dir / k) for k in (\n",
+    "            \"INCAR\",\"KPOINTS\",\"POSCAR\",\"POTCAR\",\"OUTCAR\", \"vasprun.xml\"\n",
+    "        )\n",
+    "    }\n",
     "    \n",
+    "    valid_doc = VaspValidator.from_vasp_input(\n",
+    "        vasp_file_paths={\n",
+    "            k : v for k,v in vasp_files.items() if Path(v).exists()\n",
+    "        },\n",
+    "        check_potcar=(regen_potcars and potcar)\n",
+    "    )\n",
+    "\n",
     "    if potcar_filename and potcar:\n",
     "        os.remove(potcar_filename)\n",
     "        \n",
@@ -73,107 +90,50 @@
    ]
   },
   {
-   "cell_type": "code",
-   "execution_count": 3,
-   "id": "0f660f54-ca8a-466c-b382-2f0fac46d8bf",
+   "cell_type": "markdown",
+   "id": "3",
    "metadata": {},
-   "outputs": [
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "True\n"
-     ]
-    }
-   ],
    "source": [
-    "\"\"\"\n",
-    "An example of an MP-compatible r2SCAN static calculation for GaAs is located in the `MP_compliant` directory.\n",
-    "\"\"\"\n",
-    "mp_compliant_doc = check_calc(\"MP_compliant\")\n",
-    "print(mp_compliant_doc.valid)"
+    "An example of an MP-compatible r2SCAN static calculation for GaAs is located in the `MP_compliant` directory. We also include `TaskDoc` objects generated with `atomate2`, the workflow software currently used by the Materials Project (MP) for high-throughput calculations. A `TaskDoc` is also the document schema for the MP `task` collection."
    ]
   },
   {
    "cell_type": "code",
-   "execution_count": 4,
-   "id": "25b85de2",
+   "execution_count": null,
+   "id": "4",
    "metadata": {},
-   "outputs": [
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "True\n"
-     ]
-    }
-   ],
+   "outputs": [],
    "source": [
-    "\"\"\"\n",
-    "TaskDocs for these calculations (generated with atomate2) are also saved in these directories.\n",
-    "You can load in the TaskDocs like so:\n",
-    "\"\"\"\n",
-    "compliant_task_doc = TaskDoc(\n",
-    "    **loadfn(os.path.join(\"MP_compliant\",\"MP_compatible_GaAs_r2SCAN_static.json.gz\"))\n",
-    ")\n",
-    "mp_compliant_doc = ValidationDoc.from_task_doc(compliant_task_doc)\n",
+    "mp_compliant_doc = check_calc(\"MP_compliant\")\n",
     "print(mp_compliant_doc.valid)"
    ]
   },
   {
-   "cell_type": "code",
-   "execution_count": 5,
-   "id": "c919fedd-38ef-4cf7-a2ed-54544eec8d82",
+   "cell_type": "markdown",
+   "id": "5",
    "metadata": {},
-   "outputs": [
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "False\n",
-      "INPUT SETTINGS --> KPOINTS or KSPACING: 64 kpoints were used, but it should have been at least 194.\n",
-      "INPUT SETTINGS --> ENAUG: is 900.0, but should be >= 1360.\n",
-      "INPUT SETTINGS --> ENCUT: is 450.0, but should be >= 680.\n",
-      "False\n",
-      "True\n"
-     ]
-    }
-   ],
    "source": [
-    "\"\"\"\n",
-    "An example of an MP incompatible r2SCAN static calculation for GaAs is located in the `MP_non_compliant` directory.\n",
+    "An example of an MP incompatible r<sup>2</sup>SCAN static calculation for GaAs is located in the `MP_non_compliant` directory.\n",
     "\n",
     "This calculation uses a lower ENCUT, ENAUG, and k-point density (larger KSPACING) than is permitted by the appropriate input set, `pymatgen.io.vasp.sets.MPScanStaticSet`.\n",
-    "These reasons are reflected transparently in the output reasons.\n",
-    "\"\"\"\n",
-    "mp_non_compliant_doc = check_calc(\"MP_non_compliant\")\n",
-    "print(mp_non_compliant_doc.valid)\n",
-    "for reason in mp_non_compliant_doc.reasons:\n",
-    "    print(reason)\n",
-    "\n",
-    "non_compliant_task_doc = TaskDoc(\n",
-    "    **loadfn(os.path.join(\"MP_non_compliant\",\"MP_incompatible_GaAs_r2SCAN_static.json.gz\"))\n",
-    ")\n",
-    "mp_non_compliant_doc_from_taskdoc = ValidationDoc.from_task_doc(non_compliant_task_doc)\n",
-    "print(mp_non_compliant_doc_from_taskdoc.valid)\n",
-    "print(mp_non_compliant_doc_from_taskdoc.reasons == mp_non_compliant_doc_from_taskdoc.reasons)"
+    "These reasons are reflected transparently in the output reasons."
    ]
   },
   {
    "cell_type": "code",
    "execution_count": null,
-   "id": "128e49d1",
+   "id": "6",
    "metadata": {},
    "outputs": [],
-   "source": []
+   "source": [
+    "mp_non_compliant_doc = check_calc(\"MP_non_compliant\")\n",
+    "print(mp_non_compliant_doc.valid)\n",
+    "for reason in mp_non_compliant_doc.reasons:\n",
+    "    print(reason)"
+   ]
   }
  ],
  "metadata": {
-  "kernelspec": {
-   "display_name": "Python 3 (ipykernel)",
-   "language": "python",
-   "name": "python3"
-  },
   "language_info": {
    "codemirror_mode": {
     "name": "ipython",

diff --git a/pymatgen/io/validation/__init__.py b/pymatgen/io/validation/__init__.py
@@ -4,11 +4,10 @@
 to ensure that data is compatible with some standard.
 """
 
-from pymatgen.io.validation.validation import ValidationDoc  # noqa: F401
+from pymatgen.io.validation.common import SETTINGS
+from pymatgen.io.validation.validation import VaspValidator  # noqa: F401
 
-from pymatgen.io.validation.settings import IOValidationSettings as _settings
-
-if _settings().CHECK_PYPI_AT_LOAD:
+if SETTINGS.CHECK_PYPI_AT_LOAD:
     # Only check version at module load time, if specified in module settings.
     from pymatgen.io.validation.check_package_versions import package_version_check