Skip to content

Commit e6cc11c

Browse files
committed
rename cif_assessor_tol to comp_tol
1 parent b76ebb8 commit e6cc11c

File tree

1 file changed

+37
-34
lines changed

1 file changed

+37
-34
lines changed

pymatgen/io/cif.py

Lines changed: 37 additions & 34 deletions
Original file line numberDiff line numberDiff line change
@@ -61,8 +61,7 @@ def __init__(self, data, loops, header):
6161
"""
6262
self.loops = loops
6363
self.data = data
64-
# AJ (@computron) says: CIF Block names cannot be more than 75 characters or you
65-
# get an Exception
64+
# AJ (@computron) says: CIF Block names cannot be more than 75 characters or you get an Exception
6665
self.header = header[:74]
6766

6867
def __eq__(self, other: object) -> bool:
@@ -73,7 +72,7 @@ def __eq__(self, other: object) -> bool:
7372
def __getitem__(self, key):
7473
return self.data[key]
7574

76-
def __str__(self):
75+
def __str__(self) -> str:
7776
"""Returns the cif string for the data block."""
7877
out = [f"data_{self.header}"]
7978
keys = list(self.data)
@@ -114,17 +113,21 @@ def _loop_to_str(self, loop):
114113
out += line
115114
return out
116115

117-
def _format_field(self, v):
118-
v = str(v).strip()
119-
if len(v) > self.max_len:
120-
return ";\n" + textwrap.fill(v, self.max_len) + "\n;"
116+
def _format_field(self, val) -> str:
117+
val = str(val).strip()
118+
if len(val) > self.max_len:
119+
return ";\n" + textwrap.fill(val, self.max_len) + "\n;"
121120
# add quotes if necessary
122-
if v == "":
121+
if val == "":
123122
return '""'
124-
if (" " in v or v[0] == "_") and not (v[0] == "'" and v[-1] == "'") and not (v[0] == '"' and v[-1] == '"'):
125-
q = '"' if "'" in v else "'"
126-
v = q + v + q
127-
return v
123+
if (
124+
(" " in val or val[0] == "_")
125+
and not (val[0] == "'" and val[-1] == "'")
126+
and not (val[0] == '"' and val[-1] == '"')
127+
):
128+
quote = '"' if "'" in val else "'"
129+
val = quote + val + quote
130+
return val
128131

129132
@classmethod
130133
def _process_string(cls, string):
@@ -137,19 +140,18 @@ def _process_string(cls, string):
137140
# since line breaks in .cif files are mostly meaningless,
138141
# break up into a stream of tokens to parse, rejoining multiline
139142
# strings (between semicolons)
140-
q = deque()
143+
deq = deque()
141144
multiline = False
142145
ml = []
143-
# this regex splits on spaces, except when in quotes.
144-
# starting quotes must not be preceded by non-whitespace
145-
# (these get eaten by the first expression)
146-
# ending quotes must not be followed by non-whitespace
147-
p = re.compile(r"""([^'"\s][\S]*)|'(.*?)'(?!\S)|"(.*?)"(?!\S)""")
146+
# this regex splits on spaces, except when in quotes. starting quotes must not be
147+
# preceded by non-whitespace (these get eaten by the first expression). ending
148+
# quotes must not be followed by non-whitespace
149+
pattern = re.compile(r"""([^'"\s][\S]*)|'(.*?)'(?!\S)|"(.*?)"(?!\S)""")
148150
for line in string.splitlines():
149151
if multiline:
150152
if line.startswith(";"):
151153
multiline = False
152-
q.append(("", "", "", " ".join(ml)))
154+
deq.append(("", "", "", " ".join(ml)))
153155
ml = []
154156
line = line[1:].strip()
155157
else:
@@ -159,11 +161,10 @@ def _process_string(cls, string):
159161
multiline = True
160162
ml.append(line[1:].strip())
161163
else:
162-
for s in p.findall(line):
163-
# s is tuple. location of the data in the tuple
164-
# depends on whether it was quoted in the input
165-
q.append(tuple(s))
166-
return q
164+
for string in pattern.findall(line):
165+
# location of the data in string depends on whether it was quoted in the input
166+
deq.append(tuple(string))
167+
return deq
167168

168169
@classmethod
169170
def from_str(cls, string):
@@ -216,7 +217,7 @@ def from_str(cls, string):
216217
class CifFile:
217218
"""Reads and parses CifBlocks from a .cif file or string."""
218219

219-
def __init__(self, data, orig_string=None, comment=None):
220+
def __init__(self, data: dict, orig_string: str | None = None, comment: str | None = None) -> None:
220221
"""
221222
Args:
222223
data (dict): Of CifBlock objects.
@@ -250,15 +251,15 @@ def from_str(cls, string) -> CifFile:
250251
# CifParser was also not parsing it.
251252
if "powder_pattern" in re.split(r"\n", block_str, maxsplit=1)[0]:
252253
continue
253-
block = CifBlock.from_str("data_" + block_str)
254+
block = CifBlock.from_str(f"data_{block_str}")
254255
# TODO (@janosh, 2023-10-11) multiple CIF blocks with equal header will overwrite each other,
255256
# latest taking precedence. maybe something to fix and test e.g. in test_cif_writer_write_file
256257
dct[block.header] = block
257258

258259
return cls(dct, string)
259260

260261
@classmethod
261-
def from_file(cls, filename):
262+
def from_file(cls, filename: str | Path) -> CifFile:
262263
"""
263264
Reads CifFile from a filename.
264265
@@ -273,9 +274,8 @@ def from_file(cls, filename):
273274

274275
class CifParser:
275276
"""
276-
Parses a CIF file. Attempts to fix CIFs that are out-of-spec, but will
277-
issue warnings if corrections applied. These are also stored in the
278-
CifParser's errors attribute.
277+
Parses a CIF file. Attempts to fix CIFs that are out-of-spec, but will issue warnings
278+
if corrections applied. These are also stored in the CifParser's errors attribute.
279279
"""
280280

281281
def __init__(
@@ -285,7 +285,7 @@ def __init__(
285285
site_tolerance: float = 1e-4,
286286
frac_tolerance: float = 1e-4,
287287
check_cif: bool = True,
288-
cif_assessor_tol: float = 0.01,
288+
comp_tol: float = 0.01,
289289
) -> None:
290290
"""
291291
Args:
@@ -299,7 +299,10 @@ def __init__(
299299
However, for very large CIF files, this may need to be set to 0.
300300
check_cif (bool): Whether to check that stoichiometry reported in CIF matches
301301
that of resulting Structure, and whether elements are missing. Defaults to True.
302-
cif_assessor_tol (float): Tolerance for how closely stoichiometries should match. Defaults to 0.01.
302+
comp_tol (float): Tolerance for how closely stoichiometries of CIF file and pymatgen should match.
303+
Defaults to 0.01. Context: Experimental CIF files often don't report hydrogens positions due to being
304+
hard-to-locate with X-rays. pymatgen warns if the stoichiometry of the CIF file and the Structure
305+
don't match to within comp_tol.
303306
"""
304307
self._occupancy_tolerance = occupancy_tolerance
305308
self._site_tolerance = site_tolerance
@@ -312,7 +315,7 @@ def __init__(
312315
# options related to checking CIFs for missing elements
313316
# or incorrect stoichiometries
314317
self.check_cif = check_cif
315-
self.cif_assessor_tol = cif_assessor_tol
318+
self.comp_tol = comp_tol
316319

317320
# store if CIF contains features from non-core CIF dictionaries
318321
# e.g. magCIF
@@ -1371,7 +1374,7 @@ def check(self, structure: Structure) -> str | None:
13711374
ratios = {elt: struct_comp[elt] / orig_comp[elt] for elt in orig_comp_elts}
13721375

13731376
same_stoich = all(
1374-
abs(ratios[elt_a] - ratios[elt_b]) < self.cif_assessor_tol
1377+
abs(ratios[elt_a] - ratios[elt_b]) < self.comp_tol
13751378
for elt_a in orig_comp_elts
13761379
for elt_b in orig_comp_elts
13771380
)

0 commit comments

Comments
 (0)