Skip to content

Commit 0dca5b1

Browse files
jonathanjdenneypre-commit-ci[bot]janosh
authored
Add keyword check_occu: bool = True to CifParser.get_structures() (#2836)
* Add files via upload * Add files via upload * Add files via upload * Add files via upload * Add files via upload * Add files via upload * Add files via upload * Add files via upload * Add files via upload * Add files via upload * Add files via upload * Add files via upload * merging edits * Update test_fstar.py * Addressed comments from Matt * Update test_fstar.py * Add skip_checks to CifParser * [pre-commit.ci] auto fixes from pre-commit.com hooks for more information, see https://pre-commit.ci * add tests to skip_checks add tests to skip_checks * [pre-commit.ci] auto fixes from pre-commit.com hooks for more information, see https://pre-commit.ci * Update test_skip_checks.py * [pre-commit.ci] auto fixes from pre-commit.com hooks for more information, see https://pre-commit.ci * Update test_skip_checks.py * make recommended changes * pre-commit auto-fixes * remove Unnecessary `else` after `return` statement * pre-commit auto-fixes * Update cif.py * pre-commit auto-fixes * removed folder test skip checks this folder must have gotten restored at some point. It has been deleted again. * Made recommended changes Shortened some if statements to contain less duplicates. Moved the skip_checks test to the right place. * pre-commit auto-fixes * Update test_cif.py * Update test_cif.py * deleted this version of fstar * pre-commit auto-fixes * pre-commit auto-fixes * Update cif.py * pre-commit auto-fixes * Create Skip_checks_test.cif * made requested edits * pre-commit auto-fixes * Update cif.py * add doc str Warning: Aphysical site occupancies are incompatible with many pymatgen features. * fix test_skip_checks and improve var names * Update cif.py * pre-commit auto-fixes * fix errors * pre-commit auto-fixes * Update test_cif.py * Update cif.py * Update cif.py * Update cif.py * Update cif.py * not sure why comp_dict construction was so complicated? * ensure test_skip_checks raises ValueError without occupancy_tolerance=1.5 * Update cif.py * pre-commit auto-fixes * remove code duplication in repeated if skip_occu_checks blocks * fix typos * fix mypy * reduce git diff * rename skip_occu_checks to check_occu --------- Co-authored-by: pre-commit-ci[bot] <66853113+pre-commit-ci[bot]@users.noreply.github.com> Co-authored-by: Janosh Riebesell <[email protected]>
1 parent 0d5eed0 commit 0dca5b1

File tree

3 files changed

+67
-32
lines changed

3 files changed

+67
-32
lines changed

pymatgen/apps/battery/plotter.py

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -108,7 +108,7 @@ def get_plot(self, width=8, height=8, term_zero=True, ax: plt.Axes = None):
108108
ax.plot(x, y, "-", linewidth=2, label=label)
109109

110110
ax.legend()
111-
ax.set_xlabel(self._choose_best_x_lable(formula=formula, wion_symbol=wion_symbol))
111+
ax.set_xlabel(self._choose_best_x_label(formula=formula, wion_symbol=wion_symbol))
112112
ax.set_ylabel("Voltage (V)")
113113
plt.tight_layout()
114114
return ax
@@ -157,7 +157,7 @@ def get_plotly_figure(
157157
width=width,
158158
height=height,
159159
font=font_dict,
160-
xaxis={"title": self._choose_best_x_lable(formula=formula, wion_symbol=wion_symbol)},
160+
xaxis={"title": self._choose_best_x_label(formula=formula, wion_symbol=wion_symbol)},
161161
yaxis={"title": "Voltage (V)"},
162162
**kwargs,
163163
),
@@ -166,7 +166,7 @@ def get_plotly_figure(
166166
fig.update_layout(template="plotly_white", title_x=0.5)
167167
return fig
168168

169-
def _choose_best_x_lable(self, formula, wion_symbol):
169+
def _choose_best_x_label(self, formula, wion_symbol):
170170
if self.xaxis in {"capacity", "capacity_grav"}:
171171
return "Capacity (mAh/g)"
172172
if self.xaxis == "capacity_vol":

pymatgen/io/cif.py

Lines changed: 43 additions & 21 deletions
Original file line numberDiff line numberDiff line change
@@ -13,7 +13,7 @@
1313
from io import StringIO
1414
from itertools import groupby
1515
from pathlib import Path
16-
from typing import TYPE_CHECKING, Literal
16+
from typing import TYPE_CHECKING, Any, Literal
1717

1818
import numpy as np
1919
from monty.io import zopen
@@ -24,6 +24,7 @@
2424
from pymatgen.core.lattice import Lattice
2525
from pymatgen.core.operations import MagSymmOp, SymmOp
2626
from pymatgen.core.periodic_table import DummySpecies, Element, Species, get_el_sp
27+
from pymatgen.core.sites import PeriodicSite
2728
from pymatgen.core.structure import Structure
2829
from pymatgen.electronic_structure.core import Magmom
2930
from pymatgen.symmetry.analyzer import SpacegroupAnalyzer, SpacegroupOperations
@@ -629,9 +630,9 @@ def get_lattice(
629630

630631
except KeyError:
631632
# Missing Key search for cell setting
632-
for lattice_lable in ["_symmetry_cell_setting", "_space_group_crystal_system"]:
633-
if data.data.get(lattice_lable):
634-
lattice_type = data.data.get(lattice_lable).lower()
633+
for lattice_label in ["_symmetry_cell_setting", "_space_group_crystal_system"]:
634+
if data.data.get(lattice_label):
635+
lattice_type = data.data.get(lattice_label).lower()
635636
try:
636637
required_args = getargspec(getattr(Lattice, lattice_type)).args
637638

@@ -910,8 +911,10 @@ def _parse_symbol(self, sym):
910911

911912
return parsed_sym
912913

913-
def _get_structure(self, data, primitive, symmetrized) -> Structure | None:
914-
"""Generate structure from part of the CIF."""
914+
def _get_structure(
915+
self, data: dict[str, Any], primitive: bool, symmetrized: bool, check_occu: bool = False
916+
) -> Structure | None:
917+
"""Generate structure from part of the cif."""
915918

916919
def get_num_implicit_hydrogens(sym):
917920
num_h = {"Wat": 2, "wat": 2, "O-H": 1}
@@ -960,7 +963,7 @@ def get_matching_coord(coord):
960963
if oxi_states is not None:
961964
o_s = oxi_states.get(symbol, 0)
962965
# use _atom_site_type_symbol if possible for oxidation state
963-
if "_atom_site_type_symbol" in data.data:
966+
if "_atom_site_type_symbol" in data.data: # type: ignore[attr-defined]
964967
oxi_symbol = data["_atom_site_type_symbol"][idx]
965968
o_s = oxi_states.get(oxi_symbol, o_s)
966969
try:
@@ -979,19 +982,19 @@ def get_matching_coord(coord):
979982
occu = str2float(data["_atom_site_occupancy"][idx])
980983
except (KeyError, ValueError):
981984
occu = 1
982-
983-
if occu > 0:
985+
# If check_occu is True or the occupancy is greater than 0, create comp_d
986+
if check_occu or occu > 0:
984987
coord = (x, y, z)
985988
match = get_matching_coord(coord)
986-
comp_d = {el: occu}
989+
comp_dict = {el: max(occu, 1e-8)}
990+
987991
if num_h > 0:
988-
comp_d["H"] = num_h # type: ignore
992+
comp_dict["H"] = num_h # type: ignore
989993
self.warnings.append(
990-
"Structure has implicit hydrogens defined, "
991-
"parsed structure unlikely to be suitable for use "
992-
"in calculations unless hydrogens added."
994+
"Structure has implicit hydrogens defined, parsed structure unlikely to be "
995+
"suitable for use in calculations unless hydrogens added."
993996
)
994-
comp = Composition(comp_d)
997+
comp = Composition(comp_dict)
995998

996999
if not match:
9971000
coord_to_species[coord] = comp
@@ -1002,7 +1005,6 @@ def get_matching_coord(coord):
10021005
# disordered magnetic not currently supported
10031006
coord_to_magmoms[match] = None
10041007
labels[match] = label
1005-
10061008
sum_occu = [
10071009
sum(c.values()) for c in coord_to_species.values() if set(c.elements) != {Element("O"), Element("H")}
10081010
]
@@ -1060,7 +1062,7 @@ def get_matching_coord(coord):
10601062

10611063
# The following might be a more natural representation of equivalent indices,
10621064
# but is not in the format expect by SymmetrizedStructure:
1063-
# equivalent_indices.append(list(range(len(allcoords), len(coords)+len(allcoords))))
1065+
# equivalent_indices.append(list(range(len(all_coords), len(coords)+len(all_coords))))
10641066
# The above gives a list like:
10651067
# [[0, 1, 2, 3], [4, 5, 6, 7, 8, 9, 10, 11]] where the
10661068
# integers are site indices, whereas the version used below will give a version like:
@@ -1076,6 +1078,7 @@ def get_matching_coord(coord):
10761078
all_labels.extend(new_labels)
10771079

10781080
# rescale occupancies if necessary
1081+
all_species_noedit = all_species[:] # save copy before scaling in case of check_occu=True, used below
10791082
for idx, species in enumerate(all_species):
10801083
total_occu = sum(species.values())
10811084
if 1 < total_occu <= self._occupancy_tolerance:
@@ -1105,12 +1108,21 @@ def get_matching_coord(coord):
11051108
# TODO: extract Wyckoff labels (or other CIF attributes) and include as site_properties
11061109
wyckoffs = ["Not Parsed"] * len(struct)
11071110

1108-
# Names of space groups are likewise not parsed (again, not all CIFs will contain this information)
1111+
# space groups names are likewise not parsed (again, not all CIFs will contain this information)
11091112
# What is stored are the lists of symmetry operations used to generate the structure
11101113
# TODO: ensure space group labels are stored if present
11111114
sg = SpacegroupOperations("Not Parsed", -1, self.symmetry_operations)
1115+
struct = SymmetrizedStructure(struct, sg, equivalent_indices, wyckoffs)
11121116

1113-
return SymmetrizedStructure(struct, sg, equivalent_indices, wyckoffs)
1117+
if check_occu:
1118+
struct = Structure(lattice, all_species, all_coords, site_properties=site_properties, labels=all_labels)
1119+
for idx in range(len(struct)):
1120+
struct[idx] = PeriodicSite(
1121+
all_species_noedit[idx], all_coords[idx], lattice, properties=site_properties, skip_checks=True
1122+
)
1123+
1124+
if symmetrized or check_occu:
1125+
return struct
11141126

11151127
struct = struct.get_sorted_structure()
11161128

@@ -1124,7 +1136,11 @@ def get_matching_coord(coord):
11241136
return None
11251137

11261138
def get_structures(
1127-
self, primitive: bool = True, symmetrized: bool = False, on_error: Literal["ignore", "warn", "raise"] = "warn"
1139+
self,
1140+
primitive: bool = True,
1141+
symmetrized: bool = False,
1142+
check_occu: bool = True,
1143+
on_error: Literal["ignore", "warn", "raise"] = "warn",
11281144
) -> list[Structure]:
11291145
"""Return list of structures in CIF file.
11301146
@@ -1140,12 +1156,18 @@ def get_structures(
11401156
currently Wyckoff labels and space group labels or numbers are
11411157
not included in the generated SymmetrizedStructure, these will be
11421158
notated as "Not Parsed" or -1 respectively.
1159+
check_occu (bool): If False, site occupancy will not be checked, allowing unphysical
1160+
occupancy != 1. Useful for experimental results in which occupancy was allowed
1161+
to refine to unphysical values. Warning: unphysical site occupancies are incompatible
1162+
with many pymatgen features. Defaults to True.
11431163
on_error ('ignore' | 'warn' | 'raise'): What to do in case of KeyError or ValueError
11441164
while parsing CIF file. Defaults to 'warn'.
11451165
11461166
Returns:
11471167
list[Structure]: All structures in CIF file.
11481168
"""
1169+
if check_occu: # added in https://github.com/materialsproject/pymatgen/pull/2836
1170+
warnings.warn("Structures with unphysical site occupancies are not compatible with many pymatgen features.")
11491171
if primitive and symmetrized:
11501172
raise ValueError(
11511173
"Using both 'primitive' and 'symmetrized' arguments is not currently supported "
@@ -1155,7 +1177,7 @@ def get_structures(
11551177
structures = []
11561178
for idx, dct in enumerate(self._cif.data.values()):
11571179
try:
1158-
struct = self._get_structure(dct, primitive, symmetrized)
1180+
struct = self._get_structure(dct, primitive, symmetrized, check_occu=check_occu)
11591181
if struct:
11601182
structures.append(struct)
11611183
except (KeyError, ValueError) as exc:

tests/io/test_cif.py

Lines changed: 21 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -914,33 +914,46 @@ def test_empty_deque(self):
914914
with pytest.raises(ValueError, match="Invalid CIF file with no structures"):
915915
parser.get_structures()
916916

917+
def test_skip_checks(self):
918+
with open(f"{TEST_FILES_DIR}/site_type_symbol_test.cif") as c:
919+
cif_str = c.read()
920+
cif_str = cif_str.replace("Te Te 1.0000", "Te Te 1.5000", 1)
921+
922+
with pytest.raises(ValueError, match="Invalid CIF file with no structures"):
923+
# should fail without setting custom occupancy tolerance
924+
CifParser.from_string(cif_str).get_structures()
925+
926+
parser = CifParser.from_string(cif_str, occupancy_tolerance=1.5)
927+
structs = parser.get_structures(primitive=False, symmetrized=True, check_occu=False)[0]
928+
assert structs[0].species.as_dict()["Te"] == 1.5
929+
917930

918931
class TestMagCif(PymatgenTest):
919932
def setUp(self):
920933
self.mcif = CifParser(f"{TEST_FILES_DIR}/magnetic.example.NiO.mcif")
921934
self.mcif_ncl = CifParser(f"{TEST_FILES_DIR}/magnetic.ncl.example.GdB4.mcif")
922-
self.mcif_incom = CifParser(f"{TEST_FILES_DIR}/magnetic.incommensurate.example.Cr.mcif")
923-
self.mcif_disord = CifParser(f"{TEST_FILES_DIR}/magnetic.disordered.example.CuMnO2.mcif")
935+
self.mcif_incommensurate = CifParser(f"{TEST_FILES_DIR}/magnetic.incommensurate.example.Cr.mcif")
936+
self.mcif_disordered = CifParser(f"{TEST_FILES_DIR}/magnetic.disordered.example.CuMnO2.mcif")
924937
self.mcif_ncl2 = CifParser(f"{TEST_FILES_DIR}/Mn3Ge_IR2.mcif")
925938

926939
def test_mcif_detection(self):
927940
assert self.mcif.feature_flags["magcif"]
928941
assert self.mcif_ncl.feature_flags["magcif"]
929-
assert self.mcif_incom.feature_flags["magcif"]
930-
assert self.mcif_disord.feature_flags["magcif"]
942+
assert self.mcif_incommensurate.feature_flags["magcif"]
943+
assert self.mcif_disordered.feature_flags["magcif"]
931944
assert not self.mcif.feature_flags["magcif_incommensurate"]
932945
assert not self.mcif_ncl.feature_flags["magcif_incommensurate"]
933-
assert self.mcif_incom.feature_flags["magcif_incommensurate"]
934-
assert not self.mcif_disord.feature_flags["magcif_incommensurate"]
946+
assert self.mcif_incommensurate.feature_flags["magcif_incommensurate"]
947+
assert not self.mcif_disordered.feature_flags["magcif_incommensurate"]
935948

936949
def test_get_structures(self):
937950
# incommensurate structures not currently supported
938951
with pytest.raises(NotImplementedError, match="Incommensurate structures not currently supported"):
939-
self.mcif_incom.get_structures()
952+
self.mcif_incommensurate.get_structures()
940953

941954
# disordered magnetic structures not currently supported
942955
with pytest.raises(NotImplementedError, match="Disordered magnetic structures not currently supported"):
943-
self.mcif_disord.get_structures()
956+
self.mcif_disordered.get_structures()
944957

945958
# taken from self.mcif_ncl, removing explicit magnetic symmops
946959
# so that MagneticSymmetryGroup() has to be invoked

0 commit comments

Comments
 (0)