@@ -61,8 +61,7 @@ def __init__(self, data, loops, header):
61
61
"""
62
62
self .loops = loops
63
63
self .data = data
64
- # AJ (@computron) says: CIF Block names cannot be more than 75 characters or you
65
- # get an Exception
64
+ # AJ (@computron) says: CIF Block names cannot be more than 75 characters or you get an Exception
66
65
self .header = header [:74 ]
67
66
68
67
def __eq__ (self , other : object ) -> bool :
@@ -73,7 +72,7 @@ def __eq__(self, other: object) -> bool:
73
72
def __getitem__ (self , key ):
74
73
return self .data [key ]
75
74
76
- def __str__ (self ):
75
+ def __str__ (self ) -> str :
77
76
"""Returns the cif string for the data block."""
78
77
out = [f"data_{ self .header } " ]
79
78
keys = list (self .data )
@@ -114,17 +113,21 @@ def _loop_to_str(self, loop):
114
113
out += line
115
114
return out
116
115
117
- def _format_field (self , v ) :
118
- v = str (v ).strip ()
119
- if len (v ) > self .max_len :
120
- return ";\n " + textwrap .fill (v , self .max_len ) + "\n ;"
116
+ def _format_field (self , val ) -> str :
117
+ val = str (val ).strip ()
118
+ if len (val ) > self .max_len :
119
+ return ";\n " + textwrap .fill (val , self .max_len ) + "\n ;"
121
120
# add quotes if necessary
122
- if v == "" :
121
+ if val == "" :
123
122
return '""'
124
- if (" " in v or v [0 ] == "_" ) and not (v [0 ] == "'" and v [- 1 ] == "'" ) and not (v [0 ] == '"' and v [- 1 ] == '"' ):
125
- q = '"' if "'" in v else "'"
126
- v = q + v + q
127
- return v
123
+ if (
124
+ (" " in val or val [0 ] == "_" )
125
+ and not (val [0 ] == "'" and val [- 1 ] == "'" )
126
+ and not (val [0 ] == '"' and val [- 1 ] == '"' )
127
+ ):
128
+ quote = '"' if "'" in val else "'"
129
+ val = quote + val + quote
130
+ return val
128
131
129
132
@classmethod
130
133
def _process_string (cls , string ):
@@ -137,19 +140,18 @@ def _process_string(cls, string):
137
140
# since line breaks in .cif files are mostly meaningless,
138
141
# break up into a stream of tokens to parse, rejoining multiline
139
142
# strings (between semicolons)
140
- q = deque ()
143
+ deq = deque ()
141
144
multiline = False
142
145
ml = []
143
- # this regex splits on spaces, except when in quotes.
144
- # starting quotes must not be preceded by non-whitespace
145
- # (these get eaten by the first expression)
146
- # ending quotes must not be followed by non-whitespace
147
- p = re .compile (r"""([^'"\s][\S]*)|'(.*?)'(?!\S)|"(.*?)"(?!\S)""" )
146
+ # this regex splits on spaces, except when in quotes. starting quotes must not be
147
+ # preceded by non-whitespace (these get eaten by the first expression). ending
148
+ # quotes must not be followed by non-whitespace
149
+ pattern = re .compile (r"""([^'"\s][\S]*)|'(.*?)'(?!\S)|"(.*?)"(?!\S)""" )
148
150
for line in string .splitlines ():
149
151
if multiline :
150
152
if line .startswith (";" ):
151
153
multiline = False
152
- q .append (("" , "" , "" , " " .join (ml )))
154
+ deq .append (("" , "" , "" , " " .join (ml )))
153
155
ml = []
154
156
line = line [1 :].strip ()
155
157
else :
@@ -159,11 +161,10 @@ def _process_string(cls, string):
159
161
multiline = True
160
162
ml .append (line [1 :].strip ())
161
163
else :
162
- for s in p .findall (line ):
163
- # s is tuple. location of the data in the tuple
164
- # depends on whether it was quoted in the input
165
- q .append (tuple (s ))
166
- return q
164
+ for string in pattern .findall (line ):
165
+ # location of the data in string depends on whether it was quoted in the input
166
+ deq .append (tuple (string ))
167
+ return deq
167
168
168
169
@classmethod
169
170
def from_str (cls , string ):
@@ -216,7 +217,7 @@ def from_str(cls, string):
216
217
class CifFile :
217
218
"""Reads and parses CifBlocks from a .cif file or string."""
218
219
219
- def __init__ (self , data , orig_string = None , comment = None ):
220
+ def __init__ (self , data : dict , orig_string : str | None = None , comment : str | None = None ) -> None :
220
221
"""
221
222
Args:
222
223
data (dict): Of CifBlock objects.
@@ -250,15 +251,15 @@ def from_str(cls, string) -> CifFile:
250
251
# CifParser was also not parsing it.
251
252
if "powder_pattern" in re .split (r"\n" , block_str , maxsplit = 1 )[0 ]:
252
253
continue
253
- block = CifBlock .from_str ("data_" + block_str )
254
+ block = CifBlock .from_str (f "data_{ block_str } " )
254
255
# TODO (@janosh, 2023-10-11) multiple CIF blocks with equal header will overwrite each other,
255
256
# latest taking precedence. maybe something to fix and test e.g. in test_cif_writer_write_file
256
257
dct [block .header ] = block
257
258
258
259
return cls (dct , string )
259
260
260
261
@classmethod
261
- def from_file (cls , filename ) :
262
+ def from_file (cls , filename : str | Path ) -> CifFile :
262
263
"""
263
264
Reads CifFile from a filename.
264
265
@@ -273,9 +274,8 @@ def from_file(cls, filename):
273
274
274
275
class CifParser :
275
276
"""
276
- Parses a CIF file. Attempts to fix CIFs that are out-of-spec, but will
277
- issue warnings if corrections applied. These are also stored in the
278
- CifParser's errors attribute.
277
+ Parses a CIF file. Attempts to fix CIFs that are out-of-spec, but will issue warnings
278
+ if corrections applied. These are also stored in the CifParser's errors attribute.
279
279
"""
280
280
281
281
def __init__ (
@@ -285,7 +285,7 @@ def __init__(
285
285
site_tolerance : float = 1e-4 ,
286
286
frac_tolerance : float = 1e-4 ,
287
287
check_cif : bool = True ,
288
- cif_assessor_tol : float = 0.01 ,
288
+ comp_tol : float = 0.01 ,
289
289
) -> None :
290
290
"""
291
291
Args:
@@ -299,7 +299,10 @@ def __init__(
299
299
However, for very large CIF files, this may need to be set to 0.
300
300
check_cif (bool): Whether to check that stoichiometry reported in CIF matches
301
301
that of resulting Structure, and whether elements are missing. Defaults to True.
302
- cif_assessor_tol (float): Tolerance for how closely stoichiometries should match. Defaults to 0.01.
302
+ comp_tol (float): Tolerance for how closely stoichiometries of CIF file and pymatgen should match.
303
+ Defaults to 0.01. Context: Experimental CIF files often don't report hydrogens positions due to being
304
+ hard-to-locate with X-rays. pymatgen warns if the stoichiometry of the CIF file and the Structure
305
+ don't match to within comp_tol.
303
306
"""
304
307
self ._occupancy_tolerance = occupancy_tolerance
305
308
self ._site_tolerance = site_tolerance
@@ -312,7 +315,7 @@ def __init__(
312
315
# options related to checking CIFs for missing elements
313
316
# or incorrect stoichiometries
314
317
self .check_cif = check_cif
315
- self .cif_assessor_tol = cif_assessor_tol
318
+ self .comp_tol = comp_tol
316
319
317
320
# store if CIF contains features from non-core CIF dictionaries
318
321
# e.g. magCIF
@@ -1371,7 +1374,7 @@ def check(self, structure: Structure) -> str | None:
1371
1374
ratios = {elt : struct_comp [elt ] / orig_comp [elt ] for elt in orig_comp_elts }
1372
1375
1373
1376
same_stoich = all (
1374
- abs (ratios [elt_a ] - ratios [elt_b ]) < self .cif_assessor_tol
1377
+ abs (ratios [elt_a ] - ratios [elt_b ]) < self .comp_tol
1375
1378
for elt_a in orig_comp_elts
1376
1379
for elt_b in orig_comp_elts
1377
1380
)
0 commit comments