Skip to content

Commit f463ac1

Browse files
Improve CIF checking, support for isotopes, and correct handling of new VASP 6.4.2 POSCAR format incl. slashes in header (#3542)
* Add CIF checking for incorrect/missing elements, support for isotopes (primarily Deuterium and Tritium), and correct handling of new VASP POSCAR formats * Fix LAMMPS guess_element * Refactor CifAssessor to be class method on CifParser + refactor assoc tests * Fix failing test by adding DummySpecies.A attr * Fix oxidation state parsing for isotopes * Revert element hash to be Z if not an isotope, and add new distinct hashes for isotopes * fix types in doc strings and tweak var names * fix DummySpecies.A attr, refactor __eq__ * CifParser document defaults for check_cif, cif_assessor_tol, fix doc str format * rename method CifParser.assess->check * check D+ oxi state is correctly parsed * tighten atomic mass test on H isotopes * del debug prints * merge TestCifParserAssess into TestCifIO * Update docstring for DummySpecies.Z and DummySpecies.A; allow DummySpecies.A to return either int or None * Revert format of dev_scripts/periodic_table.yaml to minimize diff --------- Co-authored-by: Janosh Riebesell <[email protected]>
1 parent 181c365 commit f463ac1

File tree

14 files changed

+358
-85
lines changed

14 files changed

+358
-85
lines changed

dev_scripts/periodic_table.yaml

Lines changed: 23 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -5435,3 +5435,26 @@ Zr:
54355435
Vickers hardness: 903 MN m<sup>-2</sup>
54365436
X: 1.33
54375437
Youngs modulus: 68 GPa
5438+
D:
5439+
Atomic no: 1
5440+
Atomic mass: 2.013553212712
5441+
Atomic mass no: 2
5442+
Common oxidation states: [-1, 1]
5443+
Is named isotope: true
5444+
Name: Deuterium
5445+
Oxidation states: [-1, -1]
5446+
Shannon radii:
5447+
1:
5448+
II:
5449+
? ''
5450+
: {crystal_radius: 0.04, ionic_radius: -0.1}
5451+
Electron affinity: 0.754674
5452+
T:
5453+
Atomic no: 1
5454+
Atomic mass: 3.0155007134
5455+
Atomic mass no: 3
5456+
Common oxidation states: [-1, -1]
5457+
Is named isotope: true
5458+
Name: Tritium
5459+
Oxidation states: [-1, -1]
5460+
Electron affinity:

dev_scripts/update_pt_data.py

Lines changed: 52 additions & 57 deletions
Original file line numberDiff line numberDiff line change
@@ -19,22 +19,11 @@
1919

2020
from pymatgen.core import Element, get_el_sp
2121

22-
23-
def test_yaml():
24-
with open("periodic_table.yaml") as file:
25-
data = yaml.load(file)
26-
print(data)
27-
28-
29-
def test_json():
30-
with open("periodic_table.json") as file:
31-
data = json.load(file)
32-
print(data)
22+
ptable_yaml_path = "periodic_table.yaml"
3323

3424

3525
def parse_oxi_state():
36-
with open("periodic_table.yaml") as file:
37-
data = yaml.load(file)
26+
data = loadfn(ptable_yaml_path)
3827
with open("oxidation_states.txt") as file:
3928
oxi_data = file.read()
4029
oxi_data = re.sub("[\n\r]", "", oxi_data)
@@ -72,10 +61,9 @@ def parse_oxi_state():
7261

7362

7463
def parse_ionic_radii():
75-
with open("periodic_table.yaml") as f:
76-
data = yaml.load(f)
77-
with open("ionic_radii.csv") as f:
78-
radii_data = f.read()
64+
data = loadfn(ptable_yaml_path)
65+
with open("ionic_radii.csv") as file:
66+
radii_data = file.read()
7967
radii_data = radii_data.split("\r")
8068
header = radii_data[0].split(",")
8169
for idx in range(1, len(radii_data)):
@@ -99,15 +87,14 @@ def parse_ionic_radii():
9987
data[el]["Ionic_radii"] = ionic_radii
10088
else:
10189
print(el)
102-
with open("periodic_table2.yaml", "w") as f:
103-
yaml.dump(data, f)
90+
with open("periodic_table2.yaml", "w") as file:
91+
yaml.dump(data, file)
10492

10593

10694
def parse_radii():
107-
with open("periodic_table.yaml") as f:
108-
data = yaml.load(f)
109-
with open("radii.csv") as f:
110-
radii_data = f.read()
95+
data = loadfn(ptable_yaml_path)
96+
with open("radii.csv") as file:
97+
radii_data = file.read()
11198
radii_data = radii_data.split("\r")
11299

113100
for line in radii_data:
@@ -134,15 +121,14 @@ def parse_radii():
134121
data[el]["Van der waals radius"] = vdw_radii
135122
else:
136123
print(el)
137-
with open("periodic_table2.yaml", "w") as f:
138-
yaml.dump(data, f)
139-
with open("periodic_table.json", "w") as f:
140-
json.dump(data, f)
124+
with open("periodic_table2.yaml", "w") as file:
125+
yaml.dump(data, file)
126+
with open("../pymatgen/core/periodic_table.json", "w") as file:
127+
json.dump(data, file)
141128

142129

143130
def update_ionic_radii():
144-
with open("periodic_table.yaml") as f:
145-
data = yaml.load(f)
131+
data = loadfn(ptable_yaml_path)
146132

147133
for d in data.values():
148134
if "Ionic_radii" in d:
@@ -154,15 +140,14 @@ def update_ionic_radii():
154140
if "Ionic_radii_ls" in d:
155141
d["Ionic radii ls"] = {k: v / 100 for k, v in d["Ionic_radii_ls"].items()}
156142
del d["Ionic_radii_ls"]
157-
with open("periodic_table2.yaml", "w") as f:
158-
yaml.dump(data, f)
159-
with open("periodic_table.json", "w") as f:
160-
json.dump(data, f)
143+
with open("periodic_table2.yaml", "w") as file:
144+
yaml.dump(data, file)
145+
with open("../pymatgen/core/periodic_table.json", "w") as file:
146+
json.dump(data, file)
161147

162148

163149
def parse_shannon_radii():
164-
with open("periodic_table.yaml") as f:
165-
data = yaml.load(f)
150+
data = loadfn(ptable_yaml_path)
166151

167152
from openpyxl import load_workbook
168153

@@ -194,22 +179,20 @@ def parse_shannon_radii():
194179
if el in data:
195180
data[el]["Shannon radii"] = dict(radii[el])
196181

197-
with open("periodic_table.yaml", "w") as f:
198-
yaml.safe_dump(data, f)
199-
with open("periodic_table.json", "w") as f:
200-
json.dump(data, f)
182+
dumpfn(data, ptable_yaml_path)
183+
with open("../pymatgen/core/periodic_table.json", "w") as file:
184+
json.dump(data, file)
201185

202186

203187
def gen_periodic_table():
204-
with open("periodic_table.yaml") as f:
205-
data = yaml.load(f)
188+
data = loadfn(ptable_yaml_path)
206189

207-
with open("periodic_table.json", "w") as f:
208-
json.dump(data, f)
190+
with open("../pymatgen/core/periodic_table.json", "w") as file:
191+
json.dump(data, file)
209192

210193

211194
def gen_iupac_ordering():
212-
periodic_table = loadfn("periodic_table.json")
195+
periodic_table = loadfn("../pymatgen/core/periodic_table.json")
213196
order = [
214197
([18], range(6, 0, -1)), # noble gasses
215198
([1], range(7, 1, -1)), # alkali metals
@@ -265,26 +248,38 @@ def add_electron_affinities():
265248
row.append(td.get_text().strip())
266249
data.append(row)
267250
data.pop(0)
268-
ea = {int(r[0]): float(re.sub(r"[\s\(\)]", "", r[3].strip("()[]"))) for r in data}
269-
assert set(ea).issuperset(range(1, 93)) # Ensure that we have data for up to U.
251+
252+
ea = {}
253+
max_Z = max(Element(element).Z for element in Element.__members__)
254+
for r in data:
255+
# don't want superheavy elements or less common isotopes
256+
if int(r[0]) > max_Z or r[2] in ea:
257+
continue
258+
temp_str = re.sub(r"[\s\(\)]", "", r[3].strip("()[]"))
259+
# hyphen-like characters used that can't be parsed by .float
260+
bytes_rep = temp_str.encode("unicode_escape").replace(b"\\u2212", b"-")
261+
ea[r[2]] = float(bytes_rep.decode("unicode_escape"))
262+
263+
Z_set = {Element.from_name(element).Z for element in ea}
264+
assert Z_set.issuperset(range(1, 93)) # Ensure that we have data for up to U.
270265
print(ea)
271266
pt = loadfn("../pymatgen/core/periodic_table.json")
272-
for k, v in pt.items():
273-
v["Electron affinity"] = ea.get(Element(k).Z)
267+
for key, val in pt.items():
268+
val["Electron affinity"] = ea.get(Element(key).long_name)
274269
dumpfn(pt, "../pymatgen/core/periodic_table.json")
275270

276271

277272
def add_ionization_energies():
278273
"""Update the periodic table data file with ground level and ionization energies from NIST."""
279274

280-
with open("NIST Atomic Ionization Energies Output.html") as f:
281-
soup = BeautifulSoup(f.read(), "html.parser")
282-
for t in soup.find_all("table"):
283-
if "Hydrogen" in t.text:
275+
with open("NIST Atomic Ionization Energies Output.html") as file:
276+
soup = BeautifulSoup(file.read(), "html.parser")
277+
for table in soup.find_all("table"):
278+
if "Hydrogen" in table.text:
284279
break
285280
data = collections.defaultdict(list)
286-
for tr in t.find_all("tr"):
287-
row = [td.get_text().strip() for td in tr.find_all("td")]
281+
for row in table.find_all("tr"):
282+
row = [td.get_text().strip() for td in row.find_all("td")]
288283
if row:
289284
Z = int(row[0])
290285
val = re.sub(r"\s", "", row[8].strip("()[]"))
@@ -294,9 +289,9 @@ def add_ionization_energies():
294289
print(data[51])
295290
assert set(data).issuperset(range(1, 93)) # Ensure that we have data for up to U.
296291
pt = loadfn("../pymatgen/core/periodic_table.json")
297-
for k, v in pt.items():
298-
del v["Ionization energy"]
299-
v["Ionization energies"] = data.get(Element(k).Z, [])
292+
for key, val in pt.items():
293+
del val["Ionization energy"]
294+
val["Ionization energies"] = data.get(Element(key).long_name, [])
300295
dumpfn(pt, "../pymatgen/core/periodic_table.json")
301296

302297

pymatgen/analysis/diffraction/core.py

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -88,7 +88,7 @@ def get_plot(
8888
two_theta_range (tuple[float, float]): Range of two_thetas to calculate in degrees.
8989
Defaults to (0, 90). Set to None if you want all diffracted beams within the limiting
9090
sphere of radius 2 / wavelength.
91-
annotate_peaks (str or None): Whether and how to annotate the peaks
91+
annotate_peaks (str | None): Whether and how to annotate the peaks
9292
with hkl indices. Default is 'compact', i.e. show short
9393
version (oriented vertically), e.g. 100. If 'full', show
9494
long version, e.g. (1, 0, 0). If None, do not show anything.
@@ -162,7 +162,7 @@ def show_plot(self, structure: Structure, **kwargs):
162162
two_thetas to calculate in degrees. Defaults to (0, 90). Set to
163163
None if you want all diffracted beams within the limiting
164164
sphere of radius 2 / wavelength.
165-
annotate_peaks (str or None): Whether and how to annotate the peaks
165+
annotate_peaks (str | None): Whether and how to annotate the peaks
166166
with hkl indices. Default is 'compact', i.e. show short
167167
version (oriented vertically), e.g. 100. If 'full', show
168168
long version, e.g. (1, 0, 0). If None, do not show anything.
@@ -180,7 +180,7 @@ def plot_structures(self, structures, fontsize=6, **kwargs):
180180
two_thetas to calculate in degrees. Defaults to (0, 90). Set to
181181
None if you want all diffracted beams within the limiting
182182
sphere of radius 2 / wavelength.
183-
annotate_peaks (str or None): Whether and how to annotate the peaks
183+
annotate_peaks (str | None): Whether and how to annotate the peaks
184184
with hkl indices. Default is 'compact', i.e. show short
185185
version (oriented vertically), e.g. 100. If 'full', show
186186
long version, e.g. (1, 0, 0). If None, do not show anything.

pymatgen/analysis/structure_matcher.py

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -1092,9 +1092,9 @@ def get_transformation(self, struct1, struct2):
10921092
struct2 (Structure): Structure to transform.
10931093
10941094
Returns:
1095-
supercell (numpy.ndarray(3, 3)): supercell matrix
1096-
vector (numpy.ndarray(3)): fractional translation vector
1097-
mapping (list(int or None)):
1095+
supercell (np.array(3, 3)): supercell matrix
1096+
vector (np.array(3)): fractional translation vector
1097+
mapping (list[int | None]):
10981098
The first len(struct1) items of the mapping vector are the
10991099
indices of struct1's corresponding sites in struct2 (or None
11001100
if there is no corresponding site), and the other items are

pymatgen/core/periodic_table.json

Lines changed: 1 addition & 1 deletion
Large diffs are not rendered by default.

0 commit comments

Comments
 (0)