Skip to content

Commit 8d6d337

Browse files
authored
[Breaking] Fix valence electron configuration parsing for PotcarSingle.electron_configuration (#4278)
* add types * make module level private variables all cap * clarify valence elec config * use symbol over sym as sym could be mistaken for symmetry? * Revert "use symbol over sym as sym could be mistaken for symmetry?" This reverts commit 66da01c. * remove unnecessary type ignore * enhance unit test and add scrambled POTCARs * use explicit None check instead of truthy * fix fake POTCAR position * rewrite electron config parser * avoid Ambiguous variable name l * less explicit tuple as i'm unable to cast type * replace K_sv with Ca_sv as it seems to interfer with LOBSTER test * drop unnecessary temp config recording * add occu_cutoff * add test for POT_PAW_PBE_64 * also change species full_electronic_structure property * implement tolerance
1 parent 190c3e7 commit 8d6d337

File tree

10 files changed

+250
-68
lines changed

10 files changed

+250
-68
lines changed

dev_scripts/potcar_scrambler.py

Lines changed: 8 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -21,7 +21,7 @@
2121

2222
class PotcarScrambler:
2323
"""
24-
Takes a POTCAR and replaces its values with completely random values
24+
Takes a POTCAR and replaces its values with completely random values.
2525
Does type matching and attempts precision matching on floats to ensure
2626
file is read correctly by Potcar and PotcarSingle classes.
2727
@@ -40,14 +40,15 @@ class PotcarScrambler:
4040

4141
def __init__(self, potcars: Potcar | PotcarSingle) -> None:
4242
self.PSP_list = [potcars] if isinstance(potcars, PotcarSingle) else potcars
43-
self.scrambled_potcars_str = ""
43+
self.scrambled_potcars_str: str = ""
4444
for psp in self.PSP_list:
4545
scrambled_potcar_str = self.scramble_single_potcar(psp)
4646
self.scrambled_potcars_str += scrambled_potcar_str
4747

4848
def _rand_float_from_str_with_prec(self, input_str: str, bloat: float = 1.5) -> float:
49-
n_prec = len(input_str.split(".")[1])
50-
bd = max(1, bloat * abs(float(input_str))) # ensure we don't get 0
49+
"""Generate a random float from str to replace true values."""
50+
n_prec: int = len(input_str.split(".")[1])
51+
bd: float = max(1.0, bloat * abs(float(input_str))) # ensure we don't get 0
5152
return round(bd * np.random.default_rng().random(), n_prec)
5253

5354
def _read_fortran_str_and_scramble(self, input_str: str, bloat: float = 1.5):
@@ -124,14 +125,16 @@ def scramble_single_potcar(self, potcar: PotcarSingle) -> str:
124125
return scrambled_potcar_str
125126

126127
def to_file(self, filename: str) -> None:
128+
"""Write scrambled POTCAR to file."""
127129
with zopen(filename, mode="wt", encoding="utf-8") as file:
128130
file.write(self.scrambled_potcars_str)
129131

130132
@classmethod
131133
def from_file(cls, input_filename: str, output_filename: str | None = None) -> Self:
134+
"""Read a POTCAR from file and generate a scrambled version."""
132135
psp = Potcar.from_file(input_filename)
133136
psp_scrambled = cls(psp)
134-
if output_filename:
137+
if output_filename is not None:
135138
psp_scrambled.to_file(output_filename)
136139
return psp_scrambled
137140

src/pymatgen/core/periodic_table.py

Lines changed: 62 additions & 34 deletions
Original file line numberDiff line numberDiff line change
@@ -33,13 +33,14 @@
3333

3434
from pymatgen.util.typing import SpeciesLike
3535

36-
# Load element data from JSON file
36+
# Load element data (periodic table) from JSON file
3737
with open(Path(__file__).absolute().parent / "periodic_table.json", encoding="utf-8") as ptable_json:
38-
_pt_data = json.load(ptable_json)
38+
_PT_DATA: dict = json.load(ptable_json)
3939

40-
_pt_row_sizes = (2, 8, 8, 18, 18, 32, 32)
40+
_PT_ROW_SIZES: tuple[int, ...] = (2, 8, 8, 18, 18, 32, 32)
4141

42-
_madelung = [
42+
# Madelung energy ordering rule (lower to higher energy)
43+
_MADELUNG: list[tuple[int, str]] = [
4344
(1, "s"),
4445
(2, "s"),
4546
(2, "p"),
@@ -137,21 +138,21 @@ def __init__(self, symbol: SpeciesLike) -> None:
137138
Solid State Communications, 1984.
138139
"""
139140
self.symbol = str(symbol)
140-
data = _pt_data[symbol]
141+
data = _PT_DATA[symbol]
141142

142143
# Store key variables for quick access
143144
self.Z = data["Atomic no"]
144145

145146
self._is_named_isotope = data.get("Is named isotope", False)
146147
if self._is_named_isotope:
147-
for sym in _pt_data:
148-
if _pt_data[sym]["Atomic no"] == self.Z and not _pt_data[sym].get("Is named isotope", False):
148+
for sym, info in _PT_DATA.items():
149+
if info["Atomic no"] == self.Z and not info.get("Is named isotope", False):
149150
self.symbol = sym
150151
break
151152
# For specified/named isotopes, treat the same as named element
152153
# (the most common isotope). Then we pad the data block with the
153154
# entries for the named element.
154-
data = {**_pt_data[self.symbol], **data}
155+
data = {**_PT_DATA[self.symbol], **data}
155156

156157
at_r = data.get("Atomic radius", "no data")
157158
if str(at_r).startswith("no data"):
@@ -452,33 +453,48 @@ def icsd_oxidation_states(self) -> tuple[int, ...]:
452453

453454
@property
454455
def full_electronic_structure(self) -> list[tuple[int, str, int]]:
455-
"""Full electronic structure as list of tuples, in order of increasing
456+
"""Full electronic structure in order of increasing
456457
energy level (according to the Madelung rule). Therefore, the final
457458
element in the list gives the electronic structure of the valence shell.
458459
459-
For example, the electronic structure for Fe is represented as:
460-
[(1, "s", 2), (2, "s", 2), (2, "p", 6), (3, "s", 2), (3, "p", 6),
461-
(4, "s", 2), (3, "d", 6)].
460+
For example, the full electronic structure for Fe is:
461+
[(1, "s", 2), (2, "s", 2), (2, "p", 6), (3, "s", 2), (3, "p", 6),
462+
(4, "s", 2), (3, "d", 6)].
462463
463464
References:
464465
Kramida, A., Ralchenko, Yu., Reader, J., and NIST ASD Team (2023). NIST
465466
Atomic Spectra Database (ver. 5.11). https://physics.nist.gov/asd [2024,
466467
June 3]. National Institute of Standards and Technology, Gaithersburg,
467468
MD. DOI: https://doi.org/10.18434/T4W30F
469+
470+
Returns:
471+
list[tuple[int, str, int]]: A list of tuples representing each subshell,
472+
where each tuple contains:
473+
- `n` (int): Principal quantum number.
474+
- `orbital_type` (str): Orbital type (e.g., "s", "p", "d", "f").
475+
- `electron_count` (int): Number of electrons in the subshell.
468476
"""
469-
e_str = self.electronic_structure
477+
e_str: str = self.electronic_structure
470478

471-
def parse_orbital(orb_str):
479+
def parse_orbital(orb_str: str) -> str | tuple[int, str, int]:
480+
"""Parse orbital information from split electron configuration string."""
481+
# Parse valence subshell notation (e.g., "3d6" -> (3, "d", 6))
472482
if match := re.match(r"(\d+)([spdfg]+)(\d+)", orb_str):
473483
return int(match[1]), match[2], int(match[3])
484+
485+
# Return core-electron configuration as-is (e.g. "[Ar]")
474486
return orb_str
475487

476-
data = [parse_orbital(s) for s in e_str.split(".")]
477-
if data[0][0] == "[":
478-
sym = data[0].replace("[", "").replace("]", "")
488+
# Split e_str (e.g. for Fe "[Ar].3d6.4s2" into ["[Ar]", "3d6", "4s2"])
489+
data: list = [parse_orbital(s) for s in e_str.split(".")]
490+
491+
# Fully expand core-electron configuration (replace noble gas notation string)
492+
if isinstance(data[0], str):
493+
sym: str = data[0].replace("[", "").replace("]", "")
479494
data = list(Element(sym).full_electronic_structure) + data[1:]
480-
# sort the final electronic structure by increasing energy level
481-
return sorted(data, key=lambda x: _madelung.index((x[0], x[1])))
495+
496+
# Sort the final electronic structure by increasing energy level
497+
return sorted(data, key=lambda x: _MADELUNG.index((x[0], x[1])))
482498

483499
@property
484500
def n_electrons(self) -> int:
@@ -563,7 +579,7 @@ def ground_state_term_symbol(self) -> str:
563579
L_symbols = "SPDFGHIKLMNOQRTUVWXYZ"
564580

565581
term_symbols = self.term_symbols
566-
term_symbol_flat = { # type: ignore[var-annotated]
582+
term_symbol_flat: dict = {
567583
term: {
568584
"multiplicity": int(term[0]),
569585
"L": L_symbols.index(term[1]),
@@ -595,7 +611,7 @@ def from_Z(Z: int, A: int | None = None) -> Element:
595611
Returns:
596612
Element with atomic number Z.
597613
"""
598-
for sym, data in _pt_data.items():
614+
for sym, data in _PT_DATA.items():
599615
atomic_mass_num = data.get("Atomic mass no") if A else None
600616
if data["Atomic no"] == Z and atomic_mass_num == A:
601617
return Element(sym)
@@ -616,7 +632,7 @@ def from_name(name: str) -> Element:
616632
uk_to_us = {"aluminium": "aluminum", "caesium": "cesium"}
617633
name = uk_to_us.get(name.lower(), name)
618634

619-
for sym, data in _pt_data.items():
635+
for sym, data in _PT_DATA.items():
620636
if data["Name"] == name.capitalize():
621637
return Element(sym)
622638

@@ -643,7 +659,7 @@ def from_row_and_group(row: int, group: int) -> Element:
643659
Note:
644660
The 18 group number system is used, i.e. noble gases are group 18.
645661
"""
646-
for sym in _pt_data:
662+
for sym in _PT_DATA:
647663
el = Element(sym)
648664
if 57 <= el.Z <= 71:
649665
el_pseudo_row = 8
@@ -683,7 +699,7 @@ def row(self) -> int:
683699
return 6
684700
if 89 <= z <= 103:
685701
return 7
686-
for idx, size in enumerate(_pt_row_sizes, start=1):
702+
for idx, size in enumerate(_PT_ROW_SIZES, start=1):
687703
total += size
688704
if total >= z:
689705
return idx
@@ -1161,33 +1177,45 @@ def electronic_structure(self) -> str:
11611177
# robustness
11621178
@property
11631179
def full_electronic_structure(self) -> list[tuple[int, str, int]]:
1164-
"""Full electronic structure as list of tuples, in order of increasing
1180+
"""Full electronic structure in order of increasing
11651181
energy level (according to the Madelung rule). Therefore, the final
11661182
element in the list gives the electronic structure of the valence shell.
11671183
1168-
For example, the electronic structure for Fe+2 is represented as:
1169-
[(1, "s", 2), (2, "s", 2), (2, "p", 6), (3, "s", 2), (3, "p", 6),
1170-
(3, "d", 6)].
1184+
For example, the full electronic structure for Fe is:
1185+
[(1, "s", 2), (2, "s", 2), (2, "p", 6), (3, "s", 2), (3, "p", 6),
1186+
(4, "s", 2), (3, "d", 6)].
11711187
11721188
References:
11731189
Kramida, A., Ralchenko, Yu., Reader, J., and NIST ASD Team (2023). NIST
11741190
Atomic Spectra Database (ver. 5.11). https://physics.nist.gov/asd [2024,
11751191
June 3]. National Institute of Standards and Technology, Gaithersburg,
11761192
MD. DOI: https://doi.org/10.18434/T4W30F
1193+
1194+
Returns:
1195+
list[tuple[int, str, int]]: A list of tuples representing each subshell,
1196+
where each tuple contains:
1197+
- `n` (int): Principal quantum number.
1198+
- `orbital_type` (str): Orbital type (e.g., "s", "p", "d", "f").
1199+
- `electron_count` (int): Number of electrons in the subshell.
11771200
"""
1178-
e_str = self.electronic_structure
1201+
e_str: str = self.electronic_structure
11791202

1180-
def parse_orbital(orb_str):
1203+
def parse_orbital(orb_str: str) -> str | tuple[int, str, int]:
1204+
"""Parse orbital information from split electron configuration string."""
1205+
# Parse valence subshell notation (e.g., "3d6" -> (3, "d", 6))
11811206
if match := re.match(r"(\d+)([spdfg]+)(\d+)", orb_str):
11821207
return int(match[1]), match[2], int(match[3])
1208+
1209+
# Return core-electron configuration as-is (e.g. "[Ar]")
11831210
return orb_str
11841211

1185-
data = [parse_orbital(s) for s in e_str.split(".")]
1186-
if data[0][0] == "[":
1212+
data: list = [parse_orbital(s) for s in e_str.split(".")]
1213+
if isinstance(data[0], str):
11871214
sym = data[0].replace("[", "").replace("]", "")
11881215
data = list(Element(sym).full_electronic_structure) + data[1:]
1189-
# sort the final electronic structure by increasing energy level
1190-
return sorted(data, key=lambda x: _madelung.index((x[0], x[1])))
1216+
1217+
# Sort the final electronic structure by increasing energy level
1218+
return sorted(data, key=lambda x: _MADELUNG.index((x[0], x[1])))
11911219

11921220
# NOTE - copied exactly from Element. Refactoring / inheritance may improve
11931221
# robustness

src/pymatgen/io/vasp/inputs.py

Lines changed: 68 additions & 23 deletions
Original file line numberDiff line numberDiff line change
@@ -2126,25 +2126,70 @@ def __repr__(self) -> str:
21262126
TITEL, VRHFIN, n_valence_elec = (self.keywords.get(key) for key in ("TITEL", "VRHFIN", "ZVAL"))
21272127
return f"{cls_name}({symbol=}, {functional=}, {TITEL=}, {VRHFIN=}, {n_valence_elec=:.0f})"
21282128

2129+
def get_electron_configuration(
2130+
self,
2131+
tol: float = 0.01,
2132+
) -> list[tuple[int, str, float]]:
2133+
"""Valence electronic configuration corresponding to the ZVAL,
2134+
read from the "Atomic configuration" section of POTCAR.
2135+
2136+
Args:
2137+
tol (float): Tolerance for occupation numbers.
2138+
- Orbitals with an occupation below `tol` are considered empty.
2139+
- Accumulation of electrons stops once the total occupation
2140+
reaches `ZVAL - tol`, preventing unnecessary additions.
2141+
2142+
Returns:
2143+
list[tuple[int, str, float]]: A list of tuples containing:
2144+
- n (int): Principal quantum number.
2145+
- subshell (str): Subshell notation (s, p, d, f).
2146+
- occ (float): Occupation number, limited to ZVAL.
2147+
"""
2148+
# Find "Atomic configuration" section
2149+
match = re.search(r"Atomic configuration", self.data)
2150+
if match is None:
2151+
raise RuntimeError("Cannot find atomic configuration section in POTCAR.")
2152+
2153+
start_idx: int = self.data[: match.start()].count("\n")
2154+
2155+
lines = self.data.splitlines()
2156+
2157+
# Extract all subshells
2158+
match_entries = re.search(r"(\d+)\s+entries", lines[start_idx + 1])
2159+
if match_entries is None:
2160+
raise RuntimeError("Cannot find entries in POTCAR.")
2161+
num_entries: int = int(match_entries.group(1))
2162+
2163+
# Get valence electron configuration (defined by ZVAL)
2164+
l_map: dict[int, str] = {0: "s", 1: "p", 2: "d", 3: "f", 4: "g", 5: "h"}
2165+
2166+
total_electrons = 0.0
2167+
valence_config: list[tuple[int, str, float]] = []
2168+
for line in lines[start_idx + 2 + num_entries : start_idx + 2 : -1]:
2169+
parts = line.split()
2170+
n, ang_moment, _j, _E, occ = int(parts[0]), int(parts[1]), float(parts[2]), float(parts[3]), float(parts[4])
2171+
2172+
if occ >= tol:
2173+
valence_config.append((n, l_map[ang_moment], occ))
2174+
total_electrons += occ
2175+
2176+
if total_electrons >= self.zval - tol:
2177+
break
2178+
2179+
return list(reversed(valence_config))
2180+
21292181
@property
2130-
def electron_configuration(self) -> list[tuple[int, str, int]] | None:
2131-
"""Electronic configuration of the PotcarSingle."""
2132-
if not self.nelectrons.is_integer():
2133-
warnings.warn(
2134-
"POTCAR has non-integer charge, electron configuration not well-defined.",
2135-
stacklevel=2,
2136-
)
2137-
return None
2138-
2139-
el = Element.from_Z(self.atomic_no)
2140-
full_config = el.full_electronic_structure
2141-
nelect = self.nelectrons
2142-
config = []
2143-
while nelect > 0:
2144-
e = full_config.pop(-1)
2145-
config.append(e)
2146-
nelect -= e[-1]
2147-
return config
2182+
def electron_configuration(self) -> list[tuple[int, str, float]]:
2183+
"""Valence electronic configuration corresponding to the ZVAL,
2184+
read from the "Atomic configuration" section of POTCAR.
2185+
2186+
Returns:
2187+
list[tuple[int, str, float]]: A list of tuples containing:
2188+
- n (int): Principal quantum number.
2189+
- subshell (str): Subshell notation (s, p, d, f).
2190+
- occ (float): Occupation number, limited to ZVAL.
2191+
"""
2192+
return self.get_electron_configuration()
21482193

21492194
@property
21502195
def element(self) -> str:
@@ -2763,7 +2808,7 @@ def _gen_potcar_summary_stats(
27632808
}
27642809
)
27652810

2766-
if summary_stats_filename:
2811+
if summary_stats_filename is not None:
27672812
dumpfn(new_summary_stats, summary_stats_filename)
27682813

27692814
return new_summary_stats
@@ -2892,16 +2937,16 @@ def set_symbols(
28922937
functional (str): The functional to use. If None, the setting
28932938
PMG_DEFAULT_FUNCTIONAL in .pmgrc.yaml is used, or if this is
28942939
not set, it will default to PBE.
2895-
sym_potcar_map (dict): A map of symbol:raw POTCAR string. If
2940+
sym_potcar_map (dict): A map of symbol to raw POTCAR string. If
28962941
sym_potcar_map is specified, POTCARs will be generated from
28972942
the given map data rather than the config file location.
28982943
"""
28992944
del self[:]
29002945

2901-
if sym_potcar_map:
2902-
self.extend(PotcarSingle(sym_potcar_map[el]) for el in symbols)
2903-
else:
2946+
if sym_potcar_map is None:
29042947
self.extend(PotcarSingle.from_symbol_and_functional(el, functional) for el in symbols)
2948+
else:
2949+
self.extend(PotcarSingle(sym_potcar_map[el]) for el in symbols)
29052950

29062951

29072952
class UnknownPotcarWarning(UserWarning):

src/pymatgen/io/vasp/sets.py

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -3521,7 +3521,8 @@ def _combine_kpoints(*kpoints_objects: Kpoints | None) -> Kpoints:
35213521
_kpoints: list[Sequence[Kpoint]] = []
35223522
_weights = []
35233523

3524-
for kpoints_object in filter(None, kpoints_objects): # type: ignore[var-annotated]
3524+
kpoints_object: Kpoints
3525+
for kpoints_object in filter(None, kpoints_objects):
35253526
if kpoints_object.style != Kpoints.supported_modes.Reciprocal:
35263527
raise ValueError("Can only combine kpoints with style=Kpoints.supported_modes.Reciprocal")
35273528
if kpoints_object.labels is None:
86.5 KB
Binary file not shown.
104 KB
Binary file not shown.
119 KB
Binary file not shown.
69.2 KB
Binary file not shown.
80.2 KB
Binary file not shown.

0 commit comments

Comments
 (0)