Skip to content

Commit f38e9c9

Browse files
Refactor to remove emmet dependence (#219)
* Remove emmet-core dependence to eliminate circular dependence in MP software stack, pending integration into emmet-archival namespace package * Lighter weight, fixed schema documents used during validation
1 parent 8d67699 commit f38e9c9

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

73 files changed

+3790
-2510
lines changed

.github/workflows/linting.yml

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -25,7 +25,7 @@ jobs:
2525
python -m pip install types-requests
2626
- name: mypy
2727
run: |
28-
mypy --namespace-packages --explicit-package-bases pymatgen
28+
mypy pymatgen
2929
- name: black
3030
run: |
3131
black --version

.pre-commit-config.yaml

Lines changed: 10 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -52,4 +52,13 @@ repos:
5252
args:
5353
- --namespace-packages
5454
- --explicit-package-bases
55-
additional_dependencies: ['types-requests']
55+
additional_dependencies: ['types-requests','pydantic>=2.10.0']
56+
57+
- repo: https://github.com/kynan/nbstripout
58+
rev: 0.8.1
59+
hooks:
60+
- id: nbstripout
61+
args:
62+
- --drop-empty-cells
63+
- --strip-init-cells
64+
- --extra-keys=metadata.kernelspec

README.md

Lines changed: 2 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -17,18 +17,11 @@ Usage
1717

1818
For validating calculations from the raw files, run:
1919
```
20-
from pymatgen.io.validation import ValidationDoc
21-
validation_doc = ValidationDoc.from_directory(dir_name = path_to_vasp_calculation_directory)
20+
from pymatgen.io.validation import VaspValidator
21+
validation_doc = VaspValidator.from_directory(path_to_vasp_calculation_directory)
2222
```
2323

2424
In the above case, whether a calculation passes the validator can be accessed via `validation_doc.valid`. Moreover, reasons for an invalidated calculation can be accessed via `validation_doc.reasons` (this will be empty for valid calculations). Last but not least, warnings for potential issues (sometimes minor, sometimes major) can be accessed via `validation_doc.warnings`.
25-
\
26-
\
27-
For validating calculations from `TaskDoc` objects from the [Emmet](https://github.com/materialsproject/emmet) package, run:
28-
```
29-
from pymatgen.io.validation import ValidationDoc
30-
validation_doc = ValidationDoc.from_task_doc(task_doc = my_task_doc)
31-
```
3225

3326
Contributors
3427
=====

examples/using_validation_docs.ipynb

Lines changed: 53 additions & 93 deletions
Original file line numberDiff line numberDiff line change
@@ -2,39 +2,43 @@
22
"cells": [
33
{
44
"cell_type": "code",
5-
"execution_count": 1,
6-
"id": "b116c8b3-e927-401b-aed8-994fe5279b54",
5+
"execution_count": null,
6+
"id": "0",
77
"metadata": {},
88
"outputs": [],
99
"source": [
1010
"from __future__ import annotations\n",
1111
"\n",
12-
"from emmet.core.tasks import TaskDoc\n",
12+
"from monty.os.path import zpath\n",
1313
"from monty.serialization import loadfn\n",
1414
"import os\n",
15+
"from pathlib import Path\n",
1516
"\n",
16-
"from pymatgen.io.validation import ValidationDoc\n",
17-
"from pymatgen.io.validation.check_potcar import CheckPotcar\n",
17+
"from pymatgen.io.validation.validation import VaspValidator\n",
1818
"\n",
1919
"from pymatgen.io.vasp import PotcarSingle, Potcar"
2020
]
2121
},
2222
{
23-
"cell_type": "code",
24-
"execution_count": 2,
25-
"id": "80064515-1e98-43da-b075-5a4c41ede437",
23+
"cell_type": "markdown",
24+
"id": "1",
2625
"metadata": {},
27-
"outputs": [],
2826
"source": [
29-
"\"\"\"\n",
3027
"For copyright reasons, the POTCAR for these calculations cannot be distributed with this file, but its summary stats can.\n",
31-
"If you have the POTCAR resources set up in pymatgen, you can regenerate the POTCARs used here by enabling `regen_potcars`\n",
32-
"\"\"\"\n",
3328
"\n",
29+
"If you have the POTCAR resources set up in pymatgen, you can regenerate the POTCARs used here by enabling `regen_potcars`"
30+
]
31+
},
32+
{
33+
"cell_type": "code",
34+
"execution_count": null,
35+
"id": "2",
36+
"metadata": {},
37+
"outputs": [],
38+
"source": [
3439
"regen_potcars = True\n",
3540
"\n",
3641
"def get_potcar_from_spec(potcar_spec : dict) -> Potcar | None:\n",
37-
" potcar_checker = CheckPotcar()\n",
3842
" \n",
3943
" for functional in PotcarSingle._potcar_summary_stats:\n",
4044
"\n",
@@ -47,24 +51,37 @@
4751
" \n",
4852
" for stats in PotcarSingle._potcar_summary_stats[functional].get(titel_no_spc,[]):\n",
4953
" \n",
50-
" if potcar_checker.compare_potcar_stats(spec[\"summary_stats\"], stats):\n",
54+
" if PotcarSingle.compare_potcar_stats(spec[\"summary_stats\"], stats):\n",
5155
" potcar.append(PotcarSingle.from_symbol_and_functional(symbol=symbol, functional=functional))\n",
5256
" matched[ispec] = True\n",
5357
" break\n",
5458
" \n",
5559
" if all(matched):\n",
5660
" return potcar\n",
5761
" \n",
58-
"def check_calc(calc_dir : str) -> ValidationDoc:\n",
62+
"def check_calc(calc_dir : str | Path) -> VaspValidator:\n",
63+
"\n",
64+
" calc_dir = Path(calc_dir)\n",
5965
" potcar_filename = None\n",
6066
" if regen_potcars:\n",
61-
" potcar = get_potcar_from_spec(loadfn(os.path.join(calc_dir,\"POTCAR.spec.gz\")))\n",
67+
" potcar = get_potcar_from_spec(loadfn(calc_dir / \"POTCAR.spec.gz\"))\n",
6268
" if potcar:\n",
63-
" potcar_filename = os.path.join(calc_dir,\"POTCAR.gz\")\n",
69+
" potcar_filename = calc_dir / \"POTCAR.gz\"\n",
6470
" potcar.write_file(potcar_filename)\n",
6571
" \n",
66-
" valid_doc = ValidationDoc.from_directory(calc_dir, check_potcar=(regen_potcars and potcar))\n",
72+
" vasp_files = {\n",
73+
" k.lower().split(\".\")[0] : zpath(calc_dir / k) for k in (\n",
74+
" \"INCAR\",\"KPOINTS\",\"POSCAR\",\"POTCAR\",\"OUTCAR\", \"vasprun.xml\"\n",
75+
" )\n",
76+
" }\n",
6777
" \n",
78+
" valid_doc = VaspValidator.from_vasp_input(\n",
79+
" vasp_file_paths={\n",
80+
" k : v for k,v in vasp_files.items() if Path(v).exists()\n",
81+
" },\n",
82+
" check_potcar=(regen_potcars and potcar)\n",
83+
" )\n",
84+
"\n",
6885
" if potcar_filename and potcar:\n",
6986
" os.remove(potcar_filename)\n",
7087
" \n",
@@ -73,107 +90,50 @@
7390
]
7491
},
7592
{
76-
"cell_type": "code",
77-
"execution_count": 3,
78-
"id": "0f660f54-ca8a-466c-b382-2f0fac46d8bf",
93+
"cell_type": "markdown",
94+
"id": "3",
7995
"metadata": {},
80-
"outputs": [
81-
{
82-
"name": "stdout",
83-
"output_type": "stream",
84-
"text": [
85-
"True\n"
86-
]
87-
}
88-
],
8996
"source": [
90-
"\"\"\"\n",
91-
"An example of an MP-compatible r2SCAN static calculation for GaAs is located in the `MP_compliant` directory.\n",
92-
"\"\"\"\n",
93-
"mp_compliant_doc = check_calc(\"MP_compliant\")\n",
94-
"print(mp_compliant_doc.valid)"
97+
"An example of an MP-compatible r2SCAN static calculation for GaAs is located in the `MP_compliant` directory. We also include `TaskDoc` objects generated with `atomate2`, the workflow software currently used by the Materials Project (MP) for high-throughput calculations. A `TaskDoc` is also the document schema for the MP `task` collection."
9598
]
9699
},
97100
{
98101
"cell_type": "code",
99-
"execution_count": 4,
100-
"id": "25b85de2",
102+
"execution_count": null,
103+
"id": "4",
101104
"metadata": {},
102-
"outputs": [
103-
{
104-
"name": "stdout",
105-
"output_type": "stream",
106-
"text": [
107-
"True\n"
108-
]
109-
}
110-
],
105+
"outputs": [],
111106
"source": [
112-
"\"\"\"\n",
113-
"TaskDocs for these calculations (generated with atomate2) are also saved in these directories.\n",
114-
"You can load in the TaskDocs like so:\n",
115-
"\"\"\"\n",
116-
"compliant_task_doc = TaskDoc(\n",
117-
" **loadfn(os.path.join(\"MP_compliant\",\"MP_compatible_GaAs_r2SCAN_static.json.gz\"))\n",
118-
")\n",
119-
"mp_compliant_doc = ValidationDoc.from_task_doc(compliant_task_doc)\n",
107+
"mp_compliant_doc = check_calc(\"MP_compliant\")\n",
120108
"print(mp_compliant_doc.valid)"
121109
]
122110
},
123111
{
124-
"cell_type": "code",
125-
"execution_count": 5,
126-
"id": "c919fedd-38ef-4cf7-a2ed-54544eec8d82",
112+
"cell_type": "markdown",
113+
"id": "5",
127114
"metadata": {},
128-
"outputs": [
129-
{
130-
"name": "stdout",
131-
"output_type": "stream",
132-
"text": [
133-
"False\n",
134-
"INPUT SETTINGS --> KPOINTS or KSPACING: 64 kpoints were used, but it should have been at least 194.\n",
135-
"INPUT SETTINGS --> ENAUG: is 900.0, but should be >= 1360.\n",
136-
"INPUT SETTINGS --> ENCUT: is 450.0, but should be >= 680.\n",
137-
"False\n",
138-
"True\n"
139-
]
140-
}
141-
],
142115
"source": [
143-
"\"\"\"\n",
144-
"An example of an MP incompatible r2SCAN static calculation for GaAs is located in the `MP_non_compliant` directory.\n",
116+
"An example of an MP incompatible r<sup>2</sup>SCAN static calculation for GaAs is located in the `MP_non_compliant` directory.\n",
145117
"\n",
146118
"This calculation uses a lower ENCUT, ENAUG, and k-point density (larger KSPACING) than is permitted by the appropriate input set, `pymatgen.io.vasp.sets.MPScanStaticSet`.\n",
147-
"These reasons are reflected transparently in the output reasons.\n",
148-
"\"\"\"\n",
149-
"mp_non_compliant_doc = check_calc(\"MP_non_compliant\")\n",
150-
"print(mp_non_compliant_doc.valid)\n",
151-
"for reason in mp_non_compliant_doc.reasons:\n",
152-
" print(reason)\n",
153-
"\n",
154-
"non_compliant_task_doc = TaskDoc(\n",
155-
" **loadfn(os.path.join(\"MP_non_compliant\",\"MP_incompatible_GaAs_r2SCAN_static.json.gz\"))\n",
156-
")\n",
157-
"mp_non_compliant_doc_from_taskdoc = ValidationDoc.from_task_doc(non_compliant_task_doc)\n",
158-
"print(mp_non_compliant_doc_from_taskdoc.valid)\n",
159-
"print(mp_non_compliant_doc_from_taskdoc.reasons == mp_non_compliant_doc_from_taskdoc.reasons)"
119+
"These reasons are reflected transparently in the output reasons."
160120
]
161121
},
162122
{
163123
"cell_type": "code",
164124
"execution_count": null,
165-
"id": "128e49d1",
125+
"id": "6",
166126
"metadata": {},
167127
"outputs": [],
168-
"source": []
128+
"source": [
129+
"mp_non_compliant_doc = check_calc(\"MP_non_compliant\")\n",
130+
"print(mp_non_compliant_doc.valid)\n",
131+
"for reason in mp_non_compliant_doc.reasons:\n",
132+
" print(reason)"
133+
]
169134
}
170135
],
171136
"metadata": {
172-
"kernelspec": {
173-
"display_name": "Python 3 (ipykernel)",
174-
"language": "python",
175-
"name": "python3"
176-
},
177137
"language_info": {
178138
"codemirror_mode": {
179139
"name": "ipython",

pymatgen/io/validation/__init__.py

Lines changed: 3 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -4,11 +4,10 @@
44
to ensure that data is compatible with some standard.
55
"""
66

7-
from pymatgen.io.validation.validation import ValidationDoc # noqa: F401
7+
from pymatgen.io.validation.common import SETTINGS
8+
from pymatgen.io.validation.validation import VaspValidator # noqa: F401
89

9-
from pymatgen.io.validation.settings import IOValidationSettings as _settings
10-
11-
if _settings().CHECK_PYPI_AT_LOAD:
10+
if SETTINGS.CHECK_PYPI_AT_LOAD:
1211
# Only check version at module load time, if specified in module settings.
1312
from pymatgen.io.validation.check_package_versions import package_version_check
1413

0 commit comments

Comments
 (0)