Skip to content

Commit 9e27ca8

Browse files
authored
Deprecate CifParser.get_structures() in favor of new parse_structures in which primitive defaults to False (#3419)
* breaking: change default primitive=True to False in CifParser.get_structures() * fix CifParser tests * add test to ensure CIfParser.get_structures() and Structure.from_file() remain consistent in the future * Structure.from_file add type hints and fix doc str claiming primitive keyword only passed to CIF * add UserWarning for change of default primitive=True to False made on 2023-10-24 * deprecate get_structures() and change primitive default to False in new parse_structures() method * use parse_structures in tests * fix TestCifTransmuter.test_init return self.parse_structures(*args, **kwargs) E TypeError: CifParser.parse_structures() got multiple values for argument 'primitive' * add comment suggested by @JaGeo * remove all internal use of deprecated CifParser.get_structures()
1 parent 1c18213 commit 9e27ca8

File tree

14 files changed

+160
-148
lines changed

14 files changed

+160
-148
lines changed

pymatgen/alchemy/materials.py

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -269,7 +269,7 @@ def from_cif_str(
269269
extracted. Defaults to True. However, there are certain
270270
instances where you might want to use a non-primitive cell,
271271
e.g., if you are trying to generate all possible orderings of
272-
partial removals or order a disordered structure.
272+
partial removals or order a disordered structure. Defaults to True.
273273
occupancy_tolerance (float): If total occupancy of a site is
274274
between 1 and occupancy_tolerance, the occupancies will be
275275
scaled down to 1.
@@ -281,7 +281,7 @@ def from_cif_str(
281281
raw_string = re.sub(r"'", '"', cif_string)
282282
cif_dict = parser.as_dict()
283283
cif_keys = list(cif_dict)
284-
struct = parser.get_structures(primitive)[0]
284+
struct = parser.parse_structures(primitive=primitive)[0]
285285
partial_cif = cif_dict[cif_keys[0]]
286286
if "_database_code_ICSD" in partial_cif:
287287
source = partial_cif["_database_code_ICSD"] + "-ICSD"

pymatgen/analysis/chemenv/utils/scripts_utils.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -240,7 +240,7 @@ def compute_environments(chemenv_configuration):
240240
if not found:
241241
input_source = input("Enter path to CIF file : ")
242242
parser = CifParser(input_source)
243-
structure = parser.get_structures()[0]
243+
structure = parser.parse_structures(primitive=True)[0]
244244
elif source_type == "mp":
245245
if not found:
246246
input_source = input('Enter materials project id (e.g. "mp-1902") : ')

pymatgen/core/structure.py

Lines changed: 6 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -43,6 +43,7 @@
4343

4444
if TYPE_CHECKING:
4545
from collections.abc import Iterable, Iterator, Sequence
46+
from pathlib import Path
4647

4748
from ase import Atoms
4849
from ase.calculators.calculator import Calculator
@@ -2767,7 +2768,7 @@ def from_str(
27672768
from pymatgen.io.cif import CifParser
27682769

27692770
parser = CifParser.from_str(input_string, **kwargs)
2770-
struct = parser.get_structures(primitive=primitive)[0]
2771+
struct = parser.parse_structures(primitive=primitive)[0]
27712772
elif fmt_low == "poscar":
27722773
from pymatgen.io.vasp import Poscar
27732774

@@ -2815,15 +2816,17 @@ def from_str(
28152816
return cls.from_sites(struct, properties=struct.properties)
28162817

28172818
@classmethod
2818-
def from_file(cls, filename, primitive=False, sort=False, merge_tol=0.0, **kwargs) -> Structure | IStructure:
2819+
def from_file(
2820+
cls, filename: str | Path, primitive: bool = False, sort: bool = False, merge_tol: float = 0.0, **kwargs
2821+
) -> Structure | IStructure:
28192822
"""Reads a structure from a file. For example, anything ending in
28202823
a "cif" is assumed to be a Crystallographic Information Format file.
28212824
Supported formats include CIF, POSCAR/CONTCAR, CHGCAR, LOCPOT,
28222825
vasprun.xml, CSSR, Netcdf and pymatgen's JSON-serialized structures.
28232826
28242827
Args:
28252828
filename (str): The filename to read from.
2826-
primitive (bool): Whether to convert to a primitive cell. Only available for CIFs. Defaults to False.
2829+
primitive (bool): Whether to convert to a primitive cell. Defaults to False.
28272830
sort (bool): Whether to sort sites. Default to False.
28282831
merge_tol (float): If this is some positive number, sites that are within merge_tol from each other will be
28292832
merged. Usually 0.01 should be enough to deal with common numerical issues.

pymatgen/io/cif.py

Lines changed: 40 additions & 14 deletions
Original file line numberDiff line numberDiff line change
@@ -8,6 +8,7 @@
88
import textwrap
99
import warnings
1010
from collections import deque
11+
from datetime import datetime
1112
from functools import partial
1213
from inspect import getfullargspec as getargspec
1314
from io import StringIO
@@ -48,7 +49,7 @@ class CifBlock:
4849
attribute.
4950
"""
5051

51-
maxlen = 70 # not quite 80 so we can deal with semicolons and things
52+
max_len = 70 # not quite 80 so we can deal with semicolons and things
5253

5354
def __init__(self, data, loops, header):
5455
"""
@@ -89,7 +90,7 @@ def __str__(self):
8990
if key not in written:
9091
# k didn't belong to a loop
9192
v = self._format_field(self.data[key])
92-
if len(key) + len(v) + 3 < self.maxlen:
93+
if len(key) + len(v) + 3 < self.max_len:
9394
out.append(f"{key} {v}")
9495
else:
9596
out.extend([key, v])
@@ -105,7 +106,7 @@ def _loop_to_string(self, loop):
105106
if val[0] == ";":
106107
out += line + "\n" + val
107108
line = "\n"
108-
elif len(line) + len(val) + 2 < self.maxlen:
109+
elif len(line) + len(val) + 2 < self.max_len:
109110
line += " " + val
110111
else:
111112
out += line
@@ -115,8 +116,8 @@ def _loop_to_string(self, loop):
115116

116117
def _format_field(self, v):
117118
v = str(v).strip()
118-
if len(v) > self.maxlen:
119-
return ";\n" + textwrap.fill(v, self.maxlen) + "\n;"
119+
if len(v) > self.max_len:
120+
return ";\n" + textwrap.fill(v, self.max_len) + "\n;"
120121
# add quotes if necessary
121122
if v == "":
122123
return '""'
@@ -1132,18 +1133,35 @@ def get_matching_coord(coord):
11321133
return struct
11331134
return None
11341135

1135-
def get_structures(
1136+
@np.deprecate(
1137+
message="get_structures is deprecated and will be removed in 2024. Use parse_structures instead."
1138+
"The only difference is that primitive defaults to False in the new parse_structures method."
1139+
"So parse_structures(primitive=True) is equivalent to the old behavior of get_structures().",
1140+
)
1141+
def get_structures(self, *args, **kwargs) -> list[Structure]:
1142+
"""
1143+
Deprecated. Use parse_structures instead. Only difference between the two methods is the
1144+
default primitive=False in parse_structures.
1145+
So parse_structures(primitive=True) is equivalent to the old behavior of get_structures().
1146+
"""
1147+
if len(args) > 0: # extract primitive if passed as arg
1148+
kwargs["primitive"] = args[0]
1149+
args = args[1:]
1150+
kwargs.setdefault("primitive", True)
1151+
return self.parse_structures(*args, **kwargs)
1152+
1153+
def parse_structures(
11361154
self,
1137-
primitive: bool = True,
1155+
primitive: bool = False,
11381156
symmetrized: bool = False,
11391157
check_occu: bool = True,
11401158
on_error: Literal["ignore", "warn", "raise"] = "warn",
11411159
) -> list[Structure]:
11421160
"""Return list of structures in CIF file.
11431161
11441162
Args:
1145-
primitive (bool): Set to False to return conventional unit cells.
1146-
Defaults to True. With magnetic CIF files, will return primitive
1163+
primitive (bool): Set to True to return primitive unit cells.
1164+
Defaults to False. With magnetic CIF files, True will return primitive
11471165
magnetic cell which may be larger than nuclear primitive cell.
11481166
symmetrized (bool): If True, return a SymmetrizedStructure which will
11491167
include the equivalent indices and symmetry operations used to
@@ -1163,6 +1181,14 @@ def get_structures(
11631181
Returns:
11641182
list[Structure]: All structures in CIF file.
11651183
"""
1184+
if os.getenv("CI") and datetime.now() > datetime(2024, 3, 1): # March 2024 seems long enough # pragma: no cover
1185+
raise RuntimeError("remove the change of default primitive=True to False made on 2023-10-24")
1186+
warnings.warn(
1187+
"The default value of primitive was changed from True to False in "
1188+
"https://github.com/materialsproject/pymatgen/pull/3419. CifParser now returns the cell "
1189+
"in the CIF file as is. If you want the primitive cell, please set primitive=True explicitly.",
1190+
UserWarning,
1191+
)
11661192
if not check_occu: # added in https://github.com/materialsproject/pymatgen/pull/2836
11671193
warnings.warn("Structures with unphysical site occupancies are not compatible with many pymatgen features.")
11681194
if primitive and symmetrized:
@@ -1326,18 +1352,18 @@ def __init__(
13261352
# primitive to conventional structures, the standard for CIF.
13271353
struct = sf.get_refined_structure()
13281354

1329-
latt = struct.lattice
1355+
lattice = struct.lattice
13301356
comp = struct.composition
13311357
no_oxi_comp = comp.element_composition
13321358
block["_symmetry_space_group_name_H-M"] = spacegroup[0]
13331359
for cell_attr in ["a", "b", "c"]:
1334-
block["_cell_length_" + cell_attr] = format_str.format(getattr(latt, cell_attr))
1360+
block["_cell_length_" + cell_attr] = format_str.format(getattr(lattice, cell_attr))
13351361
for cell_attr in ["alpha", "beta", "gamma"]:
1336-
block["_cell_angle_" + cell_attr] = format_str.format(getattr(latt, cell_attr))
1362+
block["_cell_angle_" + cell_attr] = format_str.format(getattr(lattice, cell_attr))
13371363
block["_symmetry_Int_Tables_number"] = spacegroup[1]
13381364
block["_chemical_formula_structural"] = no_oxi_comp.reduced_formula
13391365
block["_chemical_formula_sum"] = no_oxi_comp.formula
1340-
block["_cell_volume"] = format_str.format(latt.volume)
1366+
block["_cell_volume"] = format_str.format(lattice.volume)
13411367

13421368
reduced_comp, fu = no_oxi_comp.get_reduced_composition_and_factor()
13431369
block["_cell_formula_units_Z"] = str(int(fu))
@@ -1403,7 +1429,7 @@ def __init__(
14031429

14041430
magmom = Magmom(mag)
14051431
if write_magmoms and abs(magmom) > 0:
1406-
moment = Magmom.get_moment_relative_to_crystal_axes(magmom, latt)
1432+
moment = Magmom.get_moment_relative_to_crystal_axes(magmom, lattice)
14071433
atom_site_moment_label.append(f"{sp.symbol}{count}")
14081434
atom_site_moment_crystalaxis_x.append(format_str.format(moment[0]))
14091435
atom_site_moment_crystalaxis_y.append(format_str.format(moment[1]))

pymatgen/io/feff/inputs.py

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -207,8 +207,8 @@ def from_cif_file(cif_file, source="", comment=""):
207207
Returns:
208208
Header Object
209209
"""
210-
r = CifParser(cif_file)
211-
structure = r.get_structures()[0]
210+
parser = CifParser(cif_file)
211+
structure = parser.parse_structures(primitive=True)[0]
212212
return Header(structure, source, comment)
213213

214214
@property

tests/analysis/magnetism/test_analyzer.py

Lines changed: 15 additions & 21 deletions
Original file line numberDiff line numberDiff line change
@@ -15,7 +15,6 @@
1515
magnetic_deformation,
1616
)
1717
from pymatgen.core import Element, Lattice, Species, Structure
18-
from pymatgen.io.cif import CifParser
1918
from pymatgen.util.testing import TEST_FILES_DIR
2019

2120
enum_cmd = which("enum.x") or which("multienum.x")
@@ -25,45 +24,40 @@
2524

2625
class TestCollinearMagneticStructureAnalyzer(unittest.TestCase):
2726
def setUp(self):
28-
parser = CifParser(f"{TEST_FILES_DIR}/Fe.cif")
29-
self.Fe = parser.get_structures()[0]
27+
self.Fe = Structure.from_file(f"{TEST_FILES_DIR}/Fe.cif", primitive=True)
3028

31-
parser = CifParser(f"{TEST_FILES_DIR}/LiFePO4.cif")
32-
self.LiFePO4 = parser.get_structures()[0]
29+
self.LiFePO4 = Structure.from_file(f"{TEST_FILES_DIR}/LiFePO4.cif", primitive=True)
3330

34-
parser = CifParser(f"{TEST_FILES_DIR}/Fe3O4.cif")
35-
self.Fe3O4 = parser.get_structures()[0]
31+
self.Fe3O4 = Structure.from_file(f"{TEST_FILES_DIR}/Fe3O4.cif", primitive=True)
3632

37-
parser = CifParser(f"{TEST_FILES_DIR}/magnetic.ncl.example.GdB4.mcif")
38-
self.GdB4 = parser.get_structures()[0]
33+
self.GdB4 = Structure.from_file(f"{TEST_FILES_DIR}/magnetic.ncl.example.GdB4.mcif", primitive=True)
3934

40-
parser = CifParser(f"{TEST_FILES_DIR}/magnetic.example.NiO.mcif")
41-
self.NiO_expt = parser.get_structures()[0]
35+
self.NiO_expt = Structure.from_file(f"{TEST_FILES_DIR}/magnetic.example.NiO.mcif", primitive=True)
4236

43-
latt = Lattice.cubic(4.17)
37+
lattice = Lattice.cubic(4.17)
4438
species = ["Ni", "O"]
4539
coords = [[0, 0, 0], [0.5, 0.5, 0.5]]
46-
self.NiO = Structure.from_spacegroup(225, latt, species, coords)
40+
self.NiO = Structure.from_spacegroup(225, lattice, species, coords)
4741

48-
latt = Lattice([[2.085, 2.085, 0.0], [0.0, -2.085, -2.085], [-2.085, 2.085, -4.17]])
42+
lattice = Lattice([[2.085, 2.085, 0.0], [0.0, -2.085, -2.085], [-2.085, 2.085, -4.17]])
4943
species = ["Ni", "Ni", "O", "O"]
5044
coords = [[0.5, 0, 0.5], [0, 0, 0], [0.25, 0.5, 0.25], [0.75, 0.5, 0.75]]
51-
self.NiO_AFM_111 = Structure(latt, species, coords, site_properties={"magmom": [-5, 5, 0, 0]})
45+
self.NiO_AFM_111 = Structure(lattice, species, coords, site_properties={"magmom": [-5, 5, 0, 0]})
5246

53-
latt = Lattice([[2.085, 2.085, 0], [0, 0, -4.17], [-2.085, 2.085, 0]])
47+
lattice = Lattice([[2.085, 2.085, 0], [0, 0, -4.17], [-2.085, 2.085, 0]])
5448
species = ["Ni", "Ni", "O", "O"]
5549
coords = [[0.5, 0.5, 0.5], [0, 0, 0], [0, 0.5, 0], [0.5, 0, 0.5]]
56-
self.NiO_AFM_001 = Structure(latt, species, coords, site_properties={"magmom": [-5, 5, 0, 0]})
50+
self.NiO_AFM_001 = Structure(lattice, species, coords, site_properties={"magmom": [-5, 5, 0, 0]})
5751

58-
latt = Lattice([[2.085, 2.085, 0], [0, 0, -4.17], [-2.085, 2.085, 0]])
52+
lattice = Lattice([[2.085, 2.085, 0], [0, 0, -4.17], [-2.085, 2.085, 0]])
5953
species = ["Ni", "Ni", "O", "O"]
6054
coords = [[0.5, 0.5, 0.5], [0, 0, 0], [0, 0.5, 0], [0.5, 0, 0.5]]
61-
self.NiO_AFM_001_opposite = Structure(latt, species, coords, site_properties={"magmom": [5, -5, 0, 0]})
55+
self.NiO_AFM_001_opposite = Structure(lattice, species, coords, site_properties={"magmom": [5, -5, 0, 0]})
6256

63-
latt = Lattice([[2.085, 2.085, 0], [0, 0, -4.17], [-2.085, 2.085, 0]])
57+
lattice = Lattice([[2.085, 2.085, 0], [0, 0, -4.17], [-2.085, 2.085, 0]])
6458
species = ["Ni", "Ni", "O", "O"]
6559
coords = [[0.5, 0.5, 0.5], [0, 0, 0], [0, 0.5, 0], [0.5, 0, 0.5]]
66-
self.NiO_unphysical = Structure(latt, species, coords, site_properties={"magmom": [-3, 0, 0, 0]})
60+
self.NiO_unphysical = Structure(lattice, species, coords, site_properties={"magmom": [-3, 0, 0, 0]})
6761

6862
def test_get_representations(self):
6963
# tests to convert between storing magnetic moment information

tests/analysis/magnetism/test_jahnteller.py

Lines changed: 3 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -6,7 +6,7 @@
66
from pytest import approx
77

88
from pymatgen.analysis.magnetism.jahnteller import JahnTellerAnalyzer, Species
9-
from pymatgen.io.cif import CifParser
9+
from pymatgen.core import Structure
1010
from pymatgen.util.testing import TEST_FILES_DIR
1111

1212

@@ -82,11 +82,9 @@ def test_jahn_teller_species_analysis(self):
8282
assert m == "none"
8383

8484
def test_jahn_teller_structure_analysis(self):
85-
parser = CifParser(f"{TEST_FILES_DIR}/LiFePO4.cif")
86-
LiFePO4 = parser.get_structures()[0]
85+
LiFePO4 = Structure.from_file(f"{TEST_FILES_DIR}/LiFePO4.cif", primitive=True)
8786

88-
parser = CifParser(f"{TEST_FILES_DIR}/Fe3O4.cif")
89-
Fe3O4 = parser.get_structures()[0]
87+
Fe3O4 = Structure.from_file(f"{TEST_FILES_DIR}/Fe3O4.cif", primitive=True)
9088

9189
assert self.jt.is_jahn_teller_active(LiFePO4)
9290
assert self.jt.is_jahn_teller_active(Fe3O4)

tests/core/test_structure.py

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -26,6 +26,7 @@
2626
)
2727
from pymatgen.electronic_structure.core import Magmom
2828
from pymatgen.io.ase import AseAtomsAdaptor
29+
from pymatgen.io.cif import CifParser
2930
from pymatgen.symmetry.analyzer import SpacegroupAnalyzer
3031
from pymatgen.util.testing import TEST_FILES_DIR, PymatgenTest
3132

@@ -778,6 +779,10 @@ def test_to_from_file_string(self):
778779
struct = Structure.from_file(f"{TEST_FILES_DIR}/bad-unicode-gh-2947.mcif")
779780
assert struct.formula == "Ni32 O32"
780781

782+
# make sure CIfParser.parse_structures() and Structure.from_file() are consistent
783+
# i.e. uses same merge_tol for site merging, same primitive=False, etc.
784+
assert struct == CifParser(f"{TEST_FILES_DIR}/bad-unicode-gh-2947.mcif").parse_structures()[0]
785+
781786
def test_to_file_alias(self):
782787
out_path = f"{self.tmp_path}/POSCAR"
783788
assert self.struct.to(out_path) == self.struct.to_file(out_path)

tests/ext/test_matproj.py

Lines changed: 2 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -21,7 +21,6 @@
2121
from pymatgen.entries.computed_entries import ComputedEntry
2222
from pymatgen.ext.matproj import MP_LOG_FILE, MPRestError, _MPResterBasic
2323
from pymatgen.ext.matproj_legacy import TaskType, _MPResterLegacy
24-
from pymatgen.io.cif import CifParser
2524
from pymatgen.phonon.bandstructure import PhononBandStructureSymmLine
2625
from pymatgen.phonon.dos import CompletePhononDos
2726
from pymatgen.util.testing import TEST_FILES_DIR, PymatgenTest
@@ -127,8 +126,8 @@ def test_find_structure(self):
127126
cif_file = f"{TEST_FILES_DIR}/Fe3O4.cif"
128127
data = mpr.find_structure(str(cif_file))
129128
assert len(data) > 1
130-
s = CifParser(cif_file).get_structures()[0]
131-
data = mpr.find_structure(s)
129+
struct = Structure.from_file(cif_file, primitive=True)
130+
data = mpr.find_structure(struct)
132131
assert len(data) > 1
133132

134133
def test_get_entries_in_chemsys(self):
@@ -622,15 +621,6 @@ def test_get_all_materials_ids_doc(self):
622621
# data = mpr.get_materials_id_references("mp-123")
623622
# assert len(data) > 1000
624623

625-
# def test_find_structure(self):
626-
# mpr = _MPResterLegacy()
627-
# cif_file = f"{TEST_FILES_DIR}/Fe3O4.cif"
628-
# data = mpr.find_structure(str(cif_file))
629-
# assert len(data) > 1
630-
# s = CifParser(cif_file).get_structures()[0]
631-
# data = mpr.find_structure(s)
632-
# assert len(data) > 1
633-
634624
def test_get_entries_and_in_chemsys(self):
635625
# One large system test.
636626
syms = ["Li", "Fe", "O", "P", "Mn"]

tests/io/feff/test_inputs.py

Lines changed: 1 addition & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -7,7 +7,6 @@
77
from pytest import approx
88

99
from pymatgen.core import Molecule, Structure
10-
from pymatgen.io.cif import CifParser
1110
from pymatgen.io.feff.inputs import Atoms, Header, Paths, Potential, Tags
1211
from pymatgen.util.testing import TEST_FILES_DIR
1312

@@ -59,8 +58,7 @@ def test_as_dict_and_from_dict(self):
5958
class TestFeffAtoms(unittest.TestCase):
6059
@classmethod
6160
def setUpClass(cls):
62-
r = CifParser(f"{TEST_FILES_DIR}/CoO19128.cif")
63-
cls.structure = r.get_structures()[0]
61+
cls.structure = Structure.from_file(f"{TEST_FILES_DIR}/CoO19128.cif")
6462
cls.atoms = Atoms(cls.structure, "O", 12.0)
6563

6664
def test_absorbing_atom(self):

0 commit comments

Comments
 (0)