Skip to content

Commit f52c921

Browse files
stefsmeetsjanosh
andauthored
Fix for writing non-unique site labels in CifWriter (#3767)
* Add check for non-unique labels in CifWriter * Append _$NUM if label already exists * Test that labels work as intended * Remove redundant check * Update tests/io/test_cif.py Co-authored-by: Janosh Riebesell <[email protected]> Signed-off-by: Stef Smeets <[email protected]> * Add warning for duplicate sites and method for relabeling * Add test for relabeling * Work around groupby caveat * return self from SiteColl.relabel_sites(), advise to use SiteColl.copy() to avoid in-place mod in doc string * Add ignore_uniq parameter to relabel_sites method in SiteCollection class * improve coverage in TestStructure.test_relabel_sites * assert unique labels do not warn in test_cif_writer_non_unique_labels --------- Signed-off-by: Stef Smeets <[email protected]> Co-authored-by: Janosh Riebesell <[email protected]>
1 parent f8fd454 commit f52c921

File tree

4 files changed

+76
-0
lines changed

4 files changed

+76
-0
lines changed

pymatgen/core/structure.py

Lines changed: 26 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -335,6 +335,32 @@ def labels(self) -> list[str]:
335335
"""Site labels as a list."""
336336
return [site.label for site in self]
337337

338+
def relabel_sites(self, ignore_uniq: bool = False) -> Self:
339+
"""Relabel sites to ensure they are unique.
340+
341+
Site labels are updated in-place, and relabeled by suffixing _1, _2, ..., _n for duplicates.
342+
Call Structure.copy().relabel_sites() to avoid modifying the original structure.
343+
344+
Args:
345+
ignore_uniq (bool): If True, do not relabel sites that already have unique labels.
346+
Defaults to False.
347+
348+
Returns:
349+
SiteCollection: self with relabeled sites.
350+
"""
351+
grouped = collections.defaultdict(list)
352+
for site in self:
353+
grouped[site.label].append(site)
354+
355+
for label, sites in grouped.items():
356+
if len(sites) == 0 or (len(sites) == 1 and ignore_uniq):
357+
continue
358+
359+
for idx, site in enumerate(sites):
360+
site.label = f"{label}_{idx + 1}"
361+
362+
return self
363+
338364
def __contains__(self, site: object) -> bool:
339365
return site in self.sites
340366

pymatgen/io/cif.py

Lines changed: 8 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1568,6 +1568,14 @@ def __init__(
15681568
atom_site_occupancy.append(str(occu))
15691569
count += 1
15701570

1571+
if len(set(atom_site_label)) != len(atom_site_label):
1572+
warnings.warn(
1573+
"Site labels are not unique, which is not compliant with the CIF spec "
1574+
"(https://www.iucr.org/__data/iucr/cifdic_html/1/cif_core.dic/Iatom_site_label.html):"
1575+
f"`{atom_site_label}`.",
1576+
UserWarning,
1577+
)
1578+
15711579
block["_atom_site_type_symbol"] = atom_site_type_symbol
15721580
block["_atom_site_label"] = atom_site_label
15731581
block["_atom_site_symmetry_multiplicity"] = atom_site_symmetry_multiplicity

tests/core/test_structure.py

Lines changed: 19 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1005,6 +1005,25 @@ def test_replace_species_labels(self):
10051005
new2 = struct.replace_species({"Si": replacement}, in_place=False)
10061006
assert new2.labels == [label] * len(struct)
10071007

1008+
def test_relabel_sites(self):
1009+
struct = self.struct.copy()
1010+
assert self.struct.labels == ["Si", "Si"]
1011+
relabeled = struct.relabel_sites()
1012+
assert relabeled is struct
1013+
assert relabeled.labels == struct.labels == ["Si_1", "Si_2"]
1014+
1015+
struct.replace_species({"Si": "Li"})
1016+
assert struct.labels == ["Li", "Li"]
1017+
struct.relabel_sites()
1018+
assert struct.labels == ["Li_1", "Li_2"]
1019+
1020+
li_si = self.struct.copy().replace(0, "Li")
1021+
assert li_si.labels == ["Li", "Si"]
1022+
li_si.relabel_sites(ignore_uniq=True) # check no-op for unique labels
1023+
assert li_si.labels == ["Li", "Si"]
1024+
li_si.relabel_sites() # check no-op for unique labels
1025+
assert li_si.labels == ["Li_1", "Si_1"]
1026+
10081027
def test_append_insert_remove_replace_substitute(self):
10091028
struct = self.struct
10101029
struct.insert(1, "O", [0.5, 0.5, 0.5])

tests/io/test_cif.py

Lines changed: 23 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1084,3 +1084,26 @@ def test_bibtex(self):
10841084
}
10851085
"""
10861086
assert self.mcif_ncl.get_bibtex_string() == ref_bibtex_string
1087+
1088+
1089+
def test_cif_writer_non_unique_labels(capsys):
1090+
# https://github.com/materialsproject/pymatgen/issues/3761
1091+
parser = CifParser(f"{TEST_FILES_DIR}/cif/garnet.cif")
1092+
struct = parser.parse_structures()[0]
1093+
1094+
assert struct.labels[0:3] == ["Ca1", "Ca1", "Ca1"]
1095+
assert len(set(struct.labels)) != len(struct.labels)
1096+
1097+
# This should raise a warning
1098+
with pytest.warns(UserWarning, match="Site labels are not unique, which is not compliant with the CIF spec"):
1099+
CifWriter(struct)
1100+
1101+
struct.relabel_sites()
1102+
assert struct.labels[0:3] == ["Ca1_1", "Ca1_2", "Ca1_3"]
1103+
1104+
_ = capsys.readouterr()
1105+
# This should not raise a warning
1106+
CifWriter(struct)
1107+
stdout, stderr = capsys.readouterr()
1108+
assert stdout == ""
1109+
assert stderr == ""

0 commit comments

Comments
 (0)