Skip to content

Commit 181c365

Browse files
esoteric-ephemerajanoshAaron Kaplan
authored
Improve handling of Vasprun POTCAR search, expanded fake POTCAR library for VASP I/O tests (#3491)
* Added fake potcar library for tests * fix failing lobster test caused by removal of potcars * fix failing io.vasp.test_sets * fix alchemy.test_materials test * Add pymatgen.util.testing.FAKE_POTCAR_DIR var for fake potcar library, rename directory of fake potcars * refactor POTCAR search path in Vasprun.get_potcars * test_inputs.py move monkey-patching of potcar_summary_stats into new _mock_complete_potcar_summary_stats fixture * remove largely duplicate asserts assert input_set.potcar.functional == "PBE_64" in TestMatPESStaticSet * minimize diff * restore comments * Modify POTCAR scrambling to only modify parts of the POTCAR not printed to OUTCAR. Revise test data and fake POTCAR library * Make potcar_summary_stats a private attr of pymatgen.io.vasp.inputs.PotcarSingle to resolve slow docs loading time * update doc strings, fix typo * add comment explaining FAKE_POTCAR_DIR contents --------- Signed-off-by: Aaron Kaplan <[email protected]> Co-authored-by: Janosh Riebesell <[email protected]> Co-authored-by: Aaron Kaplan <[email protected]>
1 parent c14d67e commit 181c365

File tree

74 files changed

+195
-6983
lines changed

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

74 files changed

+195
-6983
lines changed

dev_scripts/potcar_scrambler.py

Lines changed: 64 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -3,6 +3,7 @@
33
import os
44
import shutil
55
import warnings
6+
from glob import glob
67

78
import numpy as np
89
from monty.os.path import zpath
@@ -68,28 +69,59 @@ def _read_fortran_str_and_scramble(self, input_str: str, bloat: float = 1.5):
6869
return input_str
6970

7071
def scramble_single_potcar(self, potcar: PotcarSingle):
72+
"""
73+
Scramble the body of a POTCAR, retain the PSCTR header information.
74+
75+
To the best of my (ADK) knowledge, in the OUTCAR file,
76+
almost all information from the POTCAR in the "PSCTR" block
77+
```
78+
parameters from PSCTR are:
79+
....
80+
END of PSCTR-controll parameters
81+
```
82+
is printed to OUTCAR. Specifically, all information above the line
83+
`Error from kinetic energy argument (eV)`
84+
is included. This information is not scrambled below.
85+
"""
7186
scrambled_potcar_str = ""
87+
needs_sha256 = False
88+
scramble_values = False
89+
og_sha_str = "SHA256 = None\n"
7290
for line in potcar.data.split("\n")[:-1]:
7391
single_line_rows = line.split(";")
74-
if "SHA256" in line or "COPYR" in line:
75-
# files not copyrighted, remove copyright statement
76-
# sha256 no longer applicable
92+
93+
if "SHA256" in line:
94+
scrambled_potcar_str += og_sha_str
95+
needs_sha256 = True
7796
continue
7897

98+
if ("Error from kinetic energy argument (eV)" in line) or ("END of PSCTR-controll parameters" in line):
99+
# start to scramble values, logic described above
100+
scramble_values = True
101+
79102
cline = ""
80103
for idx, row in enumerate(single_line_rows):
81-
split_row = row.split()
82-
for itmp, tmp in enumerate(split_row):
83-
cline += f"{self._read_fortran_str_and_scramble(tmp)}"
84-
if itmp < len(split_row) - 1:
85-
cline += " "
104+
if scramble_values:
105+
split_row = row.split()
106+
for itmp, tmp in enumerate(split_row):
107+
cline += f"{self._read_fortran_str_and_scramble(tmp)}"
108+
if itmp < len(split_row) - 1:
109+
cline += " "
110+
else:
111+
cline += row
86112
if len(single_line_rows) > 1 and idx == 0:
87113
cline += "; "
88114

89115
aux_str = ""
90116
if "TITEL" in line:
91117
aux_str = " FAKE"
92118
scrambled_potcar_str += f"{cline}{aux_str}\n"
119+
120+
if needs_sha256:
121+
tps = PotcarSingle(scrambled_potcar_str)
122+
scrambled_potcar_str = scrambled_potcar_str.replace(
123+
og_sha_str, f"SHA256 = {tps.sha256_computed_file_hash}\n"
124+
)
93125
return scrambled_potcar_str
94126

95127
def to_file(self, filename: str):
@@ -141,5 +173,28 @@ def generate_fake_potcar_libraries():
141173
break
142174

143175

176+
def potcar_cleanser():
177+
"""
178+
Function to replace copyrighted POTCARs used in io.vasp.sets testing
179+
with dummy POTCARs that have scrambled PSP and kinetic energy values
180+
(but retain the original header information which is also found in OUTCARs
181+
and freely shared by VASP)
182+
"""
183+
184+
search_dir = "../tests/files/fake_potcars/real_potcars/"
185+
rebase_dir = search_dir.replace("real", "fake")
186+
potcars_to_cleanse = glob(f"{search_dir}/**/POTCAR*", recursive=True)
187+
188+
for potcar in potcars_to_cleanse:
189+
path_to_potcar, potcar_name = potcar.split("POTCAR")
190+
rebased = path_to_potcar.replace(search_dir, rebase_dir)
191+
new_path = f"{rebased}POTCAR{potcar_name}"
192+
if new_path[-3:] != ".gz":
193+
new_path += ".gz"
194+
os.makedirs(rebased, exist_ok=True)
195+
PotcarScrambler.from_file(input_filename=potcar, output_filename=new_path)
196+
197+
144198
if __name__ == "__main__":
145-
generate_fake_potcar_libraries()
199+
potcar_cleanser()
200+
# generate_fake_potcar_libraries()

pymatgen/io/vasp/inputs.py

Lines changed: 9 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -1691,7 +1691,7 @@ class PotcarSingle:
16911691
)
16921692

16931693
# used for POTCAR validation
1694-
potcar_summary_stats = loadfn(POTCAR_STATS_PATH)
1694+
_potcar_summary_stats = loadfn(POTCAR_STATS_PATH)
16951695

16961696
def __init__(self, data: str, symbol: str | None = None) -> None:
16971697
"""
@@ -1975,7 +1975,7 @@ def identify_potcar(
19751975

19761976
identity: dict[str, list] = {"potcar_functionals": [], "potcar_symbols": []}
19771977
for func in self.functional_dir:
1978-
for ref_psp in self.potcar_summary_stats[func].get(self.TITEL.replace(" ", ""), []):
1978+
for ref_psp in self._potcar_summary_stats[func].get(self.TITEL.replace(" ", ""), []):
19791979
if self.VRHFIN.replace(" ", "") != ref_psp["VRHFIN"]:
19801980
continue
19811981

@@ -2243,9 +2243,9 @@ def is_valid(self) -> bool:
22432243
consistent values of LEXCH
22442244
"""
22452245
for func in self.functional_dir:
2246-
for titel_no_spc in self.potcar_summary_stats[func]:
2246+
for titel_no_spc in self._potcar_summary_stats[func]:
22472247
if self.TITEL.replace(" ", "") == titel_no_spc:
2248-
for potcar_subvariant in self.potcar_summary_stats[func][titel_no_spc]:
2248+
for potcar_subvariant in self._potcar_summary_stats[func][titel_no_spc]:
22492249
if self.VRHFIN.replace(" ", "") == potcar_subvariant["VRHFIN"]:
22502250
possible_potcar_matches.append(
22512251
{
@@ -2370,7 +2370,7 @@ def __repr__(self) -> str:
23702370

23712371

23722372
def _gen_potcar_summary_stats(
2373-
append: bool = False, vasp_psp_dir: str | None = None, summary_stats_filename: str = POTCAR_STATS_PATH
2373+
append: bool = False, vasp_psp_dir: str | None = None, summary_stats_filename: str | None = POTCAR_STATS_PATH
23742374
):
23752375
"""
23762376
This function solely intended to be used for PMG development to regenerate the
@@ -2426,7 +2426,10 @@ def _gen_potcar_summary_stats(
24262426
}
24272427
)
24282428

2429-
dumpfn(new_summary_stats, summary_stats_filename)
2429+
if summary_stats_filename:
2430+
dumpfn(new_summary_stats, summary_stats_filename)
2431+
2432+
return new_summary_stats
24302433

24312434

24322435
class Potcar(list, MSONable):

pymatgen/io/vasp/outputs.py

Lines changed: 20 additions & 21 deletions
Original file line numberDiff line numberDiff line change
@@ -993,35 +993,34 @@ def get_potcars(self, path: str | Path) -> Potcar | None:
993993
Returns the POTCAR from the specified path.
994994
995995
Args:
996-
path (str): The path to search for POTCARs.
996+
path (str | Path): The path to search for POTCARs.
997997
998998
Returns:
999-
Potcar | None: The POTCAR from the specified path.
999+
Potcar | None: The POTCAR from the specified path or None if not found/no path specified.
10001000
"""
10011001

1002-
def get_potcar_in_path(p):
1003-
for fn in os.listdir(os.path.abspath(p)):
1004-
if fn.startswith("POTCAR") and ".spec" not in fn:
1005-
pc = Potcar.from_file(os.path.join(p, fn))
1006-
if {d.header for d in pc} == set(self.potcar_symbols):
1007-
return pc
1008-
warnings.warn(f"No POTCAR file with matching TITEL fields was found in {os.path.abspath(p)}")
1002+
if not path:
10091003
return None
10101004

1011-
if isinstance(path, (str, Path)):
1012-
path = str(path)
1013-
if "POTCAR" in path:
1014-
potcar = Potcar.from_file(path)
1015-
if {d.TITEL for d in potcar} != set(self.potcar_symbols):
1016-
raise ValueError("Potcar TITELs do not match Vasprun")
1017-
else:
1018-
potcar = get_potcar_in_path(path)
1019-
elif isinstance(path, bool) and path:
1020-
potcar = get_potcar_in_path(os.path.split(self.filename)[0])
1005+
if isinstance(path, (str, Path)) and "POTCAR" in str(path):
1006+
potcar_paths = [str(path)]
10211007
else:
1022-
potcar = None
1008+
search_path = os.path.split(self.filename)[0] if path is True else str(path)
1009+
potcar_paths = [
1010+
f"{search_path}/{fn}" for fn in os.listdir(search_path) if fn.startswith("POTCAR") and ".spec" not in fn
1011+
]
10231012

1024-
return potcar
1013+
for potcar_path in potcar_paths:
1014+
try:
1015+
potcar = Potcar.from_file(potcar_path)
1016+
if {d.header for d in potcar} == set(self.potcar_symbols):
1017+
return potcar
1018+
except Exception:
1019+
continue
1020+
1021+
warnings.warn("No POTCAR file with matching TITEL fields was found in\n" + "\n ".join(potcar_paths))
1022+
1023+
return None
10251024

10261025
def get_trajectory(self):
10271026
"""

pymatgen/io/vasp/sets.py

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1270,7 +1270,9 @@ def __init__(
12701270
self._config_dict["INCAR"].pop("GGA", None)
12711271
if xc_functional.upper().endswith("+U"):
12721272
self._config_dict["INCAR"]["LDAU"] = True
1273-
default_potcars = self.CONFIG["PARENT"].replace("PBE", "PBE_").replace("BASE", "") # PBE64BASE -> PBE_64
1273+
1274+
default_potcars = self.CONFIG["PARENT"].upper()
1275+
default_potcars = default_potcars.replace("PBE", "PBE_").replace("BASE", "") # PBE64BASE -> PBE_64
12741276
user_potcar_functional = kwargs.get("user_potcar_functional", default_potcars)
12751277
if user_potcar_functional.upper() != default_potcars:
12761278
warnings.warn(

pymatgen/util/testing.py

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -26,6 +26,10 @@
2626
MODULE_DIR = Path(__file__).absolute().parent
2727
STRUCTURES_DIR = MODULE_DIR / "structures"
2828
TEST_FILES_DIR = Path(SETTINGS.get("PMG_TEST_FILES_DIR", MODULE_DIR / ".." / ".." / "tests" / "files"))
29+
# fake POTCARs have original header information, meaning properties like number of electrons,
30+
# nuclear charge, core radii, etc. are unchanged (important for testing) while values of the and
31+
# pseudopotential kinetic energy corrections are scrambled to avoid VASP copyright infringement
32+
FAKE_POTCAR_DIR = TEST_FILES_DIR / "fake_potcars"
2933

3034

3135
class PymatgenTest(unittest.TestCase):

tests/alchemy/test_materials.py

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -15,7 +15,7 @@
1515
SupercellTransformation,
1616
)
1717
from pymatgen.util.provenance import StructureNL
18-
from pymatgen.util.testing import TEST_FILES_DIR, PymatgenTest
18+
from pymatgen.util.testing import FAKE_POTCAR_DIR, TEST_FILES_DIR, PymatgenTest
1919

2020

2121
class TestTransformedStructure(PymatgenTest):
@@ -49,7 +49,7 @@ def test_append_filter(self):
4949
self.trans.append_filter(f3)
5050

5151
def test_get_vasp_input(self):
52-
SETTINGS["PMG_VASP_PSP_DIR"] = TEST_FILES_DIR
52+
SETTINGS["PMG_VASP_PSP_DIR"] = FAKE_POTCAR_DIR
5353
potcar = self.trans.get_vasp_input(MPRelaxSet)["POTCAR"]
5454
assert "\n".join(p.symbol for p in potcar) == "Na_pv\nFe_pv\nP\nO"
5555
assert len(self.trans.structures) == 2
-69.3 KB
Binary file not shown.
-69 KB
Binary file not shown.
-46.3 KB
Binary file not shown.
-67.6 KB
Binary file not shown.

0 commit comments

Comments
 (0)