Skip to content

Commit 309caa5

Browse files
Add basic S3 object retrieval to the pymatgen user agent (#4438)
* add hacky s3 requests * undo qchem input change * status code change * correct type annotations for bandstructure objects * revert es kpoints typing - only phonon ones are wrong * more mypy * more mypy * revert typing changes
1 parent ba211c4 commit 309caa5

File tree

2 files changed

+60
-12
lines changed

2 files changed

+60
-12
lines changed

src/pymatgen/ext/matproj.py

Lines changed: 60 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -9,6 +9,7 @@
99

1010
from __future__ import annotations
1111

12+
import gzip
1213
import itertools
1314
import json
1415
import logging
@@ -19,17 +20,21 @@
1920
from functools import partial
2021
from typing import TYPE_CHECKING, NamedTuple
2122

23+
import numpy as np
2224
import orjson
2325
import requests
2426
from monty.json import MontyDecoder
2527

26-
from pymatgen.core import SETTINGS
28+
from pymatgen.core import SETTINGS, Lattice
2729
from pymatgen.core import __version__ as PMG_VERSION
2830
from pymatgen.core.composition import Composition
31+
from pymatgen.electronic_structure.bandstructure import Kpoint
32+
from pymatgen.phonon import CompletePhononDos, PhononBandStructureSymmLine
2933
from pymatgen.symmetry.analyzer import SpacegroupAnalyzer
3034

3135
if TYPE_CHECKING:
3236
from collections.abc import Callable, Sequence
37+
from typing import Any
3338

3439
from typing_extensions import Self
3540

@@ -474,31 +479,77 @@ def get_entries_in_chemsys(self, elements: str | list[str], *args, **kwargs):
474479

475480
return self.get_entries(criteria, *args, **kwargs)
476481

477-
def get_phonon_bandstructure_by_material_id(self, material_id: str):
482+
def _retrieve_object_from_s3(self, material_id: str, bucket: str, prefix: str, timeout: float = 60) -> Any:
483+
"""
484+
Retrieve data from Amazon S3 OpenData the non-canonical way, using requests.
485+
486+
This should be transitioned to boto3 if long-term support is desired,
487+
or to expand pymatgen support of, e.g., electronic DOS, bandstructure, etc.
488+
489+
Args:
490+
material_id (str): Materials Project material_id
491+
bucket (str): the Materials Project bucket, either materialsproject-parsed
492+
or materialsproject-build
493+
prefix (str) : the prefix of the particular S3 key.
494+
timeout (float = 60) : timeout in seconds for the requests command.
495+
496+
Returns:
497+
json loaded object
498+
"""
499+
response = requests.get(
500+
f"https://s3.us-east-1.amazonaws.com/{bucket}/{prefix}/{material_id}.json.gz",
501+
timeout=timeout,
502+
)
503+
if response.status_code not in {200, 400}:
504+
raise MPRestError(
505+
f"Failed to retrieve data from OpenData with status code {response.status_code}:\n{response.reason}"
506+
)
507+
return orjson.loads(gzip.decompress(response.content))
508+
509+
def get_phonon_bandstructure_by_material_id(self, material_id: str) -> PhononBandStructureSymmLine:
478510
"""Get phonon bandstructure by material_id.
479511
512+
Note that this method borrows constructor methods built into
513+
in the emmet-core model for this data. Calling the `to_pmg`
514+
method of the emmet-core data model handles this.
515+
480516
Args:
481517
material_id (str): Materials Project material_id
482518
483519
Returns:
484520
PhononBandStructureSymmLine: A phonon band structure.
485521
"""
486-
prop = "phonon_bandstructure"
487-
response = self.materials.phonon.search(material_ids=material_id)
488-
return response[0][prop]
522+
data = self._retrieve_object_from_s3(
523+
material_id, bucket="materialsproject-parsed", prefix="ph-bandstructures/dfpt"
524+
)
525+
rlatt = Lattice(data["reciprocal_lattice"])
526+
return PhononBandStructureSymmLine(
527+
[Kpoint(q, lattice=rlatt).frac_coords for q in data["qpoints"]],
528+
np.array(data["frequencies"]),
529+
rlatt,
530+
has_nac=data["has_nac"],
531+
eigendisplacements=np.array(data["eigendisplacements"]),
532+
structure=data["structure"],
533+
labels_dict={k: Kpoint(v, lattice=rlatt).frac_coords for k, v in (data["labels_dict"] or {}).items()},
534+
coords_are_cartesian=False,
535+
)
489536

490-
def get_phonon_dos_by_material_id(self, material_id: str):
537+
def get_phonon_dos_by_material_id(self, material_id: str) -> CompletePhononDos:
491538
"""Get phonon density of states by material_id.
492539
540+
Note that this method borrows constructor methods built into
541+
in the emmet-core model for this data. Calling the `to_pmg`
542+
method of the emmet-core data model handles this.
543+
493544
Args:
494545
material_id (str): Materials Project material_id
495546
496547
Returns:
497548
CompletePhononDos: A phonon DOS object.
498549
"""
499-
prop = "phonon_dos"
500-
response = self.request(f"materials/phonon/?material_ids={material_id}&_fields={prop}")
501-
return response[0][prop]
550+
data = self._retrieve_object_from_s3(material_id, bucket="materialsproject-parsed", prefix="ph-dos/dfpt")
551+
data["pdos"] = data.pop("projected_densities", None)
552+
return CompletePhononDos.from_dict(data)
502553

503554

504555
class MPRestError(Exception):

tests/ext/test_matproj.py

Lines changed: 0 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -104,13 +104,10 @@ def test_get_entry_by_material_id(mprester):
104104
mprester.get_entry_by_material_id("mp-2022") # "mp-2022" does not exist
105105

106106

107-
@pytest.mark.skip(reason="MP staff broke the API: Jun 2025")
108107
def test_get_phonon_data_by_material_id(mprester):
109108
bs = mprester.get_phonon_bandstructure_by_material_id("mp-661")
110-
print(type(bs))
111109
assert isinstance(bs, PhononBandStructureSymmLine)
112110
dos = mprester.get_phonon_dos_by_material_id("mp-661")
113-
print(type(dos))
114111
assert isinstance(dos, CompletePhononDos)
115112

116113

0 commit comments

Comments
 (0)