materialsproject
diff --git a/‎.github/workflows/testing.yml‎
Lines changed: 2 additions & 0 deletions b/‎.github/workflows/testing.yml‎
Lines changed: 2 additions & 0 deletions
diff --git a/‎docs/dev/vasp_tests.md‎
Lines changed: 38 additions & 1 deletion b/‎docs/dev/vasp_tests.md‎
Lines changed: 38 additions & 1 deletion
diff --git a/‎pyproject.toml‎
Lines changed: 2 additions & 2 deletions b/‎pyproject.toml‎
Lines changed: 2 additions & 2 deletions
diff --git a/‎src/atomate2/aims/schemas/calculation.py‎
Lines changed: 7 additions & 6 deletions b/‎src/atomate2/aims/schemas/calculation.py‎
Lines changed: 7 additions & 6 deletions
diff --git a/‎src/atomate2/utils/testing/__init__.py‎
Lines changed: 2 additions & 0 deletions b/‎src/atomate2/utils/testing/__init__.py‎
Lines changed: 2 additions & 0 deletions
diff --git a/‎src/atomate2/utils/testing/common.py‎
Lines changed: 40 additions & 0 deletions b/‎src/atomate2/utils/testing/common.py‎
Lines changed: 40 additions & 0 deletions
@@ -205,6 +205,8 @@ jobs:
         run: uv pip install --upgrade 'git+https://github.com/materialsproject/pymatgen@${{ github.event.client_payload.pymatgen_ref }}'
 
       - name: Test Notebooks
+        env:
+          MP_API_KEY: ${{ secrets.MP_API_KEY }}
         run: |
           micromamba activate a2
           pytest --nbmake ./tutorials --ignore=./tutorials/openmm_tutorial.ipynb --ignore=./tutorials/force_fields
 
@@ -146,6 +146,28 @@ name. For example, there cannot be two calculations called "relax". Instead you
 should ensure they are named something like "relax 1" and "relax 2".
 ```
 
+### 2a. Dealing with larger amounts of test data / directories.
+
+Some complex workflows and tutorials might require a larger than normal amount of test data to execute.
+You might also realize that a branching workflow requires many steps, leading to a sprawling reference directory structure.
+In this case, you may want to use separate tools in `atomate2` which bundle your VASP test data into JSON format archives.
+To do this for a single VASP calculation directory, you might run:
+
+```python
+from atomate2.utils.testing.vasp import VaspTestData
+
+vasp_test_data = VaspTestData.from_directory(
+    "/Users/alex/atomate2/job_2021-11-08-17-24-31-799852-28250"
+)
+vasp_test_data.to_file("tight_relax_1.json.lzma")
+```
+
+You can use any other compression method supported by `monty.io.zpath`, such as GZIP (`.gz`) or bzip2 (`.bz2`).
+The LZMA compression method is shown here because this offers generally the highest compression ratio of the three and is fairly quick to decompress, but it is CPU intensive to compress.
+
+The `VaspTestData` class handles removal of POTCAR copyright information, converting them to POTCAR.spec files as before.
+The test infrastructure using `mock_vasp` is also equipped to handle extraction of VASP files from a compressed JSON archive.
+
 ## 3. Copy the test data folder into atomate2
 
 You can now copy the WF_NAME folder into the atomate2 test files. VASP test files live
@@ -283,10 +305,25 @@ def test_elastic(mock_vasp, clean_dir):
     )
 ```
 
-Note that the `mock_vasp` and `clean_dir` arguments to the test function are
+<b>Note:</b> The `mock_vasp` and `clean_dir` arguments to the test function are
 [pytest fixtures](https://docs.pytest.org/en/6.2.x/fixture.html) and are essential
 for the test to run successfully.
 
+<b>Note:</b> If you used the `VaspTestData` method of creating JSON archives for the test data, you would use the following for `ref_paths`:
+
+```py
+ref_paths = {
+    "elastic relax 1/6": "Si_elastic/elastic_relax_1_6.json.lzma",
+    "elastic relax 2/6": "Si_elastic/elastic_relax_2_6.json.lzma",
+    "elastic relax 3/6": "Si_elastic/elastic_relax_3_6.json.lzma",
+    "elastic relax 4/6": "Si_elastic/elastic_relax_4_6.json.lzma",
+    "elastic relax 5/6": "Si_elastic/elastic_relax_5_6.json.lzma",
+    "elastic relax 6/6": "Si_elastic/elastic_relax_6_6.json.lzma",
+    "tight relax 1": "Si_elastic/tight_relax_1.json.lzma",
+    "tight relax 2": "Si_elastic/tight_relax_2.json.lzma",
+}
+```
+
 ```{warning}
 For `mock_vasp` to work correctly, all imports needed for the test must be
 imported in the test function itself (rather than at the top of the file).
 
@@ -109,7 +109,7 @@ strict = [
     "numpy",
     "phonopy==2.30.1",
     "pydantic-settings==2.7.0",
-    "pydantic==2.9.2",
+    "pydantic==2.11.1",
     "pymatgen-analysis-defects==2025.1.18",
     "pymatgen==2025.2.18",
     "pymongo==4.10.1",
@@ -123,7 +123,7 @@ strict-openff = [
     "monty==2025.3.3",
     "openmm-mdanalysis-reporter==0.1.0",
     "openmm==8.1.1",
-    "pymatgen==2025.3.10",
+    "pymatgen==2024.11.13", # TODO: open ff is extremely sensitive to pymatgen version
     "mdanalysis==2.7.0"
 ]
 strict-forcefields = [
 
@@ -7,10 +7,11 @@
 from collections.abc import Sequence
 from datetime import datetime, timezone
 from pathlib import Path
-from typing import TYPE_CHECKING, Any, Optional, Union
+from typing import TYPE_CHECKING, Any, Optional
 
 import numpy as np
 from ase.spectrum.band_structure import BandStructure
+from emmet.core.math import Matrix3D, Vector3D
 from jobflow.utils import ValueEnum
 from pydantic import BaseModel, Field
 from pymatgen.core import Molecule, Structure
@@ -19,10 +20,10 @@
 from pymatgen.io.aims.inputs import AimsGeometryIn
 from pymatgen.io.aims.outputs import AimsOutput
 from pymatgen.io.common import VolumetricData
-from typing_extensions import Self
 
 if TYPE_CHECKING:
-    from emmet.core.math import Matrix3D, Vector3D
+    from typing_extensions import Self
+
 
 STORE_VOLUMETRIC_DATA = ("total_density",)
 
@@ -93,7 +94,7 @@ class CalculationOutput(BaseModel):
         None, description="The final DFT energy per atom for the calculation"
     )
 
-    structure: Union[Structure, Molecule] = Field(
+    structure: Structure | Molecule = Field(
         None, description="The final structure from the calculation"
     )
 
@@ -127,7 +128,7 @@ class CalculationOutput(BaseModel):
         description="The valence band maximum, or HOMO for molecules, in eV "
         "(if system is not metallic)",
     )
-    atomic_steps: list[Union[Structure, Molecule]] = Field(
+    atomic_steps: list[Structure | Molecule] = Field(
         None, description="Structures for each ionic step"
     )
 
@@ -198,7 +199,7 @@ class CalculationInput(BaseModel):
         The parameters passed in the control.in file
     """
 
-    structure: Union[Structure, Molecule] = Field(
+    structure: Structure | Molecule = Field(
         None, description="The input structure object"
     )
     parameters: dict[str, Any] = Field(
 
@@ -8,3 +8,5 @@
 
 This module will hold the core logic for those tests.
 """
+
+from atomate2.utils.testing.common import get_job_uuid_name_map
@@ -0,0 +1,40 @@
+"""Define common testing utils used in atomate2."""
+
+from __future__ import annotations
+
+from typing import TYPE_CHECKING
+
+if TYPE_CHECKING:
+    from jobflow import Flow, Job, Response
+
+
+def get_job_uuid_name_map(job_flow_resp: Job | Flow | Response) -> dict[str, str]:
+    """
+    Get all job UUIDs and map them to the job name.
+
+    Useful for running complex flows locally / testing in CI, where one often
+    wants the output of a job with a specific name.
+
+    Parameters
+    ----------
+    job_flow_resp : jobflow Job, Flow, or Response
+
+    Returns
+    -------
+    dict mapping string UUIDs to string names.
+    """
+    uuid_to_name: dict[str, str] = {}
+
+    def recursive_get_job_names(
+        flow_like: Job | Flow, uuid_to_name: dict[str, str]
+    ) -> None:
+        if flow_jobs := getattr(flow_like, "jobs", None):
+            for job in flow_jobs:
+                recursive_get_job_names(job, uuid_to_name)
+        elif replacement := getattr(flow_like, "replace", None):
+            recursive_get_job_names(replacement, uuid_to_name)
+        else:
+            uuid_to_name[flow_like.uuid] = flow_like.name
+
+    recursive_get_job_names(job_flow_resp, uuid_to_name)
+    return uuid_to_name