Skip to content

Commit f9f2508

Browse files
authored
Add CifParser.get_structures(on_error='warn') (#3175)
* simplify key in dict check to dict.get(key) * better var names * CifParser.get_structures() add kwarg on_error: Literal["ignore", "warn", "raise"] = "warn"
1 parent 5cc93fe commit f9f2508

File tree

8 files changed

+65
-67
lines changed

8 files changed

+65
-67
lines changed

pymatgen/core/composition.py

Lines changed: 5 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -856,16 +856,16 @@ def add_charges_from_oxi_state_guesses(
856856

857857
def remove_charges(self) -> Composition:
858858
"""
859-
Removes the charges from any species in a Composition object.
859+
Returns a new Composition with charges from each Species removed.
860860
861861
Returns:
862862
Composition object without charge decoration, for example
863863
{"Fe3+": 2.0, "O2-":3.0} becomes {"Fe": 2.0, "O":3.0}
864864
"""
865-
d: dict[Element, float] = collections.defaultdict(float)
866-
for e, a in self.items():
867-
d[Element(e.symbol)] += a
868-
return Composition(d)
865+
dct: dict[Element, float] = collections.defaultdict(float)
866+
for specie, amt in self.items():
867+
dct[Element(specie.symbol)] += amt
868+
return Composition(dct)
869869

870870
def _get_oxid_state_guesses(self, all_oxi_states, max_sites, oxi_states_override, target_charge):
871871
"""

pymatgen/core/ion.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -142,7 +142,7 @@ def get_reduced_formula_and_factor(self, iupac_ordering: bool = False, hydrates:
142142
d = {k: int(round(v)) for k, v in comp.get_el_amt_dict().items()}
143143
(formula, factor) = reduce_formula(d, iupac_ordering=iupac_ordering)
144144

145-
if "HO" in formula and self.composition["H"] == self.composition["O"]:
145+
if self.composition.get("H") == self.composition.get("O") is not None:
146146
formula = formula.replace("HO", "OH")
147147

148148
if nH2O > 0:

pymatgen/electronic_structure/bandstructure.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -681,7 +681,7 @@ def from_old_dict(cls, dct):
681681
labels_dict = {k.strip(): v for k, v in dct["labels_dict"].items()}
682682
projections = {}
683683
structure = None
684-
if "projections" in dct and len(dct["projections"]) != 0:
684+
if dct.get("projections"):
685685
structure = Structure.from_dict(dct["structure"])
686686
projections = {}
687687
for spin in dct["projections"]:

pymatgen/ext/optimade.py

Lines changed: 7 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -320,15 +320,13 @@ def get_snls_with_filter(
320320
pbar = tqdm(total=json["meta"].get("data_returned", 0), desc=identifier, initial=len(structures))
321321

322322
# TODO: check spec for `more_data_available` boolean, may simplify this conditional
323-
if ("links" in json) and ("next" in json["links"]) and (json["links"]["next"]):
324-
while "next" in json["links"] and json["links"]["next"]:
325-
next_link = json["links"]["next"]
326-
if isinstance(next_link, dict) and "href" in next_link:
327-
next_link = next_link["href"]
328-
json = self._get_json(next_link)
329-
additional_structures = self._get_snls_from_resource(json, url, identifier)
330-
structures.update(additional_structures)
331-
pbar.update(len(additional_structures))
323+
while next_link := json.get("links", {}).get("next"):
324+
if isinstance(next_link, dict) and "href" in next_link:
325+
next_link = next_link["href"]
326+
json = self._get_json(next_link)
327+
additional_structures = self._get_snls_from_resource(json, url, identifier)
328+
structures.update(additional_structures)
329+
pbar.update(len(additional_structures))
332330

333331
if structures:
334332
all_snls[identifier] = structures

pymatgen/io/abinit/pseudos.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -267,7 +267,7 @@ def from_dict(cls, dct):
267267
new = cls.from_file(dct["filepath"])
268268

269269
# Consistency test based on md5
270-
if "md5" in dct and dct["md5"] != new.md5:
270+
if dct.get("md5") != new.md5:
271271
raise ValueError(
272272
f"The md5 found in file does not agree with the one in dict\nReceived {dct['md5']}\nComputed {new.md5}"
273273
)

pymatgen/io/cif.py

Lines changed: 33 additions & 29 deletions
Original file line numberDiff line numberDiff line change
@@ -13,6 +13,7 @@
1313
from io import StringIO
1414
from itertools import groupby
1515
from pathlib import Path
16+
from typing import Literal
1617

1718
import numpy as np
1819
from monty.io import zopen
@@ -885,8 +886,8 @@ def _parse_symbol(self, sym):
885886

886887
return parsed_sym
887888

888-
def _get_structure(self, data, primitive, symmetrized):
889-
"""Generate structure from part of the cif."""
889+
def _get_structure(self, data, primitive, symmetrized) -> Structure | None:
890+
"""Generate structure from part of the CIF."""
890891

891892
def get_num_implicit_hydrogens(sym):
892893
num_h = {"Wat": 2, "wat": 2, "O-H": 1}
@@ -907,26 +908,23 @@ def get_num_implicit_hydrogens(sym):
907908

908909
oxi_states = self.parse_oxi_states(data)
909910

910-
coord_to_species = {}
911+
coord_to_species = {} # type: ignore
911912
coord_to_magmoms = {}
912913
labels = {}
913914

914915
def get_matching_coord(coord):
915916
keys = list(coord_to_species)
916917
coords = np.array(keys)
917918
for op in self.symmetry_operations:
918-
c = op.operate(coord)
919-
inds = find_in_coord_list_pbc(coords, c, atol=self._site_tolerance)
920-
# can't use if inds, because python is dumb and np.array([0]) evaluates
921-
# to False
922-
if len(inds) > 0:
923-
return keys[inds[0]]
919+
frac_coord = op.operate(coord)
920+
indices = find_in_coord_list_pbc(coords, frac_coord, atol=self._site_tolerance)
921+
if len(indices) > 0:
922+
return keys[indices[0]]
924923
return False
925924

926925
for idx, label in enumerate(data["_atom_site_label"]):
927926
try:
928-
# If site type symbol exists, use it. Otherwise, we use the
929-
# label.
927+
# If site type symbol exists, use it. Otherwise, we use the label.
930928
symbol = self._parse_symbol(data["_atom_site_type_symbol"][idx])
931929
num_h = get_num_implicit_hydrogens(data["_atom_site_type_symbol"][idx])
932930
except KeyError:
@@ -946,7 +944,7 @@ def get_matching_coord(coord):
946944
except Exception:
947945
el = DummySpecies(symbol, o_s)
948946
else:
949-
el = get_el_sp(symbol)
947+
el = get_el_sp(symbol) # type: ignore
950948

951949
x = str2float(data["_atom_site_fract_x"][idx])
952950
y = str2float(data["_atom_site_fract_y"][idx])
@@ -963,7 +961,7 @@ def get_matching_coord(coord):
963961
match = get_matching_coord(coord)
964962
comp_d = {el: occu}
965963
if num_h > 0:
966-
comp_d["H"] = num_h
964+
comp_d["H"] = num_h # type: ignore
967965
self.warnings.append(
968966
"Structure has implicit hydrogens defined, "
969967
"parsed structure unlikely to be suitable for use "
@@ -1067,12 +1065,12 @@ def get_matching_coord(coord):
10671065
site_properties["magmom"] = all_magmoms
10681066

10691067
if len(site_properties) == 0:
1070-
site_properties = None
1068+
site_properties = None # type: ignore
10711069

10721070
if any(all_labels):
10731071
assert len(all_labels) == len(all_species)
10741072
else:
1075-
all_labels = None
1073+
all_labels = None # type: ignore
10761074

10771075
struct = Structure(lattice, all_species, all_coords, site_properties=site_properties, labels=all_labels)
10781076

@@ -1099,10 +1097,10 @@ def get_matching_coord(coord):
10991097
return struct
11001098
return None
11011099

1102-
def get_structures(self, primitive=True, symmetrized=False):
1103-
"""
1104-
Return list of structures in CIF file. primitive boolean sets whether a
1105-
conventional cell structure or primitive cell structure is returned.
1100+
def get_structures(
1101+
self, primitive: bool = True, symmetrized: bool = False, on_error: Literal["ignore", "warn", "raise"] = "warn"
1102+
) -> list[Structure]:
1103+
"""Return list of structures in CIF file.
11061104
11071105
Args:
11081106
primitive (bool): Set to False to return conventional unit cells.
@@ -1116,9 +1114,11 @@ def get_structures(self, primitive=True, symmetrized=False):
11161114
currently Wyckoff labels and space group labels or numbers are
11171115
not included in the generated SymmetrizedStructure, these will be
11181116
notated as "Not Parsed" or -1 respectively.
1117+
on_error ('ignore' | 'warn' | 'raise'): What to do in case of KeyError or ValueError
1118+
while parsing CIF file. Defaults to 'warn'.
11191119
11201120
Returns:
1121-
List of Structures.
1121+
list[Structure]: All structures in CIF file.
11221122
"""
11231123
if primitive and symmetrized:
11241124
raise ValueError(
@@ -1127,24 +1127,28 @@ def get_structures(self, primitive=True, symmetrized=False):
11271127
)
11281128

11291129
structures = []
1130-
for i, d in enumerate(self._cif.data.values()):
1130+
for idx, dct in enumerate(self._cif.data.values()):
11311131
try:
1132-
struct = self._get_structure(d, primitive, symmetrized)
1132+
struct = self._get_structure(dct, primitive, symmetrized)
11331133
if struct:
11341134
structures.append(struct)
11351135
except (KeyError, ValueError) as exc:
1136-
# Warn the user (Errors should never pass silently)
11371136
# A user reported a problem with cif files produced by Avogadro
11381137
# in which the atomic coordinates are in Cartesian coords.
1139-
self.warnings.append(str(exc))
1140-
warnings.warn(f"No structure parsed for {i + 1} structure in CIF. Section of CIF file below.")
1141-
warnings.warn(str(d))
1142-
warnings.warn(f"Error is {exc}.")
1138+
msg = f"No structure parsed for section {idx + 1} in CIF.\n{exc}"
1139+
if on_error == "raise":
1140+
raise ValueError(msg) from exc
1141+
if on_error == "warn":
1142+
warnings.warn(msg)
1143+
self.warnings.append(msg)
1144+
# continue silently if on_error == "ignore"
11431145

1144-
if self.warnings:
1146+
# if on_error == "raise" we don't get to here so no need to check
1147+
if self.warnings and on_error == "warn":
11451148
warnings.warn("Issues encountered while parsing CIF: " + "\n".join(self.warnings))
1149+
11461150
if len(structures) == 0:
1147-
raise ValueError("Invalid cif file with no structures!")
1151+
raise ValueError("Invalid CIF file with no structures!")
11481152
return structures
11491153

11501154
def get_bibtex_string(self):

pymatgen/io/lmto.py

Lines changed: 1 addition & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -289,13 +289,7 @@ def from_dict(cls, dct):
289289
dct["SPCGRP"], plat, species, positions, coords_are_cartesian=True
290290
)
291291
except ValueError:
292-
structure = Structure(
293-
plat,
294-
species,
295-
positions,
296-
coords_are_cartesian=True,
297-
to_unit_cell=True,
298-
)
292+
structure = Structure(plat, species, positions, coords_are_cartesian=True, to_unit_cell=True)
299293
else:
300294
structure = Structure(plat, species, positions, coords_are_cartesian=True, to_unit_cell=True)
301295

pymatgen/io/tests/test_cif.py

Lines changed: 16 additions & 14 deletions
Original file line numberDiff line numberDiff line change
@@ -26,10 +26,10 @@
2626

2727
class CifBlockTest(PymatgenTest):
2828
def test_to_string(self):
29-
with open(f"{self.TEST_FILES_DIR}/Graphite.cif") as f:
30-
s = f.read()
31-
c = CifBlock.from_str(s)
32-
cif_str_2 = str(CifBlock.from_str(str(c)))
29+
with open(f"{self.TEST_FILES_DIR}/Graphite.cif") as file:
30+
cif_str = file.read()
31+
cif_block = CifBlock.from_str(cif_str)
32+
cif_str_2 = str(CifBlock.from_str(str(cif_block)))
3333
cif_str = """data_53781-ICSD
3434
_database_code_ICSD 53781
3535
_audit_creation_date 2003-04-01
@@ -108,7 +108,7 @@ def test_to_string(self):
108108
_atom_site_attached_hydrogens
109109
C1 C0+ 2 b 0 0 0.25 . 1. 0
110110
C2 C0+ 2 c 0.3333 0.6667 0.25 . 1. 0"""
111-
for l1, l2, l3 in zip(str(c).split("\n"), cif_str.split("\n"), cif_str_2.split("\n")):
111+
for l1, l2, l3 in zip(str(cif_block).split("\n"), cif_str.split("\n"), cif_str_2.split("\n")):
112112
assert l1.strip() == l2.strip()
113113
assert l2.strip() == l3.strip()
114114

@@ -717,7 +717,7 @@ def test_no_coords_or_species(self):
717717
? ? ? ? ? ? ?
718718
"""
719719
parser = CifParser.from_str(string)
720-
with pytest.raises(ValueError, match="Invalid cif file with no structures"):
720+
with pytest.raises(ValueError, match="Invalid CIF file with no structures"):
721721
parser.get_structures()
722722

723723
def test_get_lattice_from_lattice_type(self):
@@ -792,11 +792,13 @@ def test_empty(self):
792792
assert cb == cb2
793793

794794
def test_bad_cif(self):
795-
f = f"{self.TEST_FILES_DIR}/bad_occu.cif"
796-
parser = CifParser(f)
797-
with pytest.raises(ValueError, match="Invalid cif file with no structures"):
798-
parser.get_structures()
799-
parser = CifParser(f, occupancy_tolerance=2)
795+
filepath = f"{self.TEST_FILES_DIR}/bad_occu.cif"
796+
parser = CifParser(filepath)
797+
with pytest.raises(
798+
ValueError, match="No structure parsed for section 1 in CIF.\nSpecies occupancies sum to more than 1!"
799+
):
800+
parser.get_structures(on_error="raise")
801+
parser = CifParser(filepath, occupancy_tolerance=2)
800802
struct = parser.get_structures()[0]
801803
assert struct[0].species["Al3+"] == approx(0.5)
802804

@@ -829,7 +831,7 @@ def test_replacing_finite_precision_frac_coords(self):
829831
)
830832

831833
def test_empty_deque(self):
832-
s = """data_1526655
834+
cif_str = """data_1526655
833835
_journal_name_full
834836
_space_group_IT_number 227
835837
_symmetry_space_group_name_Hall 'F 4d 2 3 -1d'
@@ -862,7 +864,7 @@ def test_empty_deque(self):
862864
3 -x,-y,-z
863865
4 x-1/2,-y-1/2,z-1/2
864866
;"""
865-
parser = CifParser.from_str(s)
867+
parser = CifParser.from_str(cif_str)
866868
assert parser.get_structures()[0].formula == "Si1"
867869
cif = """
868870
data_1526655
@@ -894,7 +896,7 @@ def test_empty_deque(self):
894896
Si1 Si 0 0 0 1 0.0
895897
"""
896898
parser = CifParser.from_str(cif)
897-
with pytest.raises(ValueError, match="Invalid cif file with no structures"):
899+
with pytest.raises(ValueError, match="Invalid CIF file with no structures"):
898900
parser.get_structures()
899901

900902

0 commit comments

Comments
 (0)