Skip to content

Commit 7cc19c3

Browse files
authored
Fix timeout in command_line.enumlib_caller.EnumlibAdaptor (#4276)
* add types * use timeout in communicate * enable timeout test * reduce timeout * fix types * increase timeout to reveal the issue * fix test command * avoid passing default args * avoid single letter var also avoid shadowing outside * send SIGKILL after timeout * update timeout test * compress test file * err msg with more details
1 parent fbf60a1 commit 7cc19c3

File tree

5 files changed

+104
-96
lines changed

5 files changed

+104
-96
lines changed

src/pymatgen/command_line/enumlib_caller.py

Lines changed: 89 additions & 80 deletions
Original file line numberDiff line numberDiff line change
@@ -8,19 +8,19 @@
88
99
If you use this module, please cite:
1010
11-
Gus L. W. Hart and Rodney W. Forcade, "Algorithm for generating derivative
12-
structures," Phys. Rev. B 77 224115 (26 June 2008)
11+
- Gus L. W. Hart and Rodney W. Forcade, "Algorithm for generating derivative
12+
structures," Phys. Rev. B 77 224115 (26 June 2008)
1313
14-
Gus L. W. Hart and Rodney W. Forcade, "Generating derivative structures from
15-
multilattices: Application to hcp alloys," Phys. Rev. B 80 014120 (July 2009)
14+
- Gus L. W. Hart and Rodney W. Forcade, "Generating derivative structures from
15+
multilattices: Application to hcp alloys," Phys. Rev. B 80 014120 (July 2009)
1616
17-
Gus L. W. Hart, Lance J. Nelson, and Rodney W. Forcade, "Generating
18-
derivative structures at a fixed concentration," Comp. Mat. Sci. 59
19-
101-107 (March 2012)
17+
- Gus L. W. Hart, Lance J. Nelson, and Rodney W. Forcade, "Generating
18+
derivative structures at a fixed concentration," Comp. Mat. Sci. 59
19+
101-107 (March 2012)
2020
21-
Wiley S. Morgan, Gus L. W. Hart, Rodney W. Forcade, "Generating derivative
22-
superstructures for systems with high configurational freedom," Comp. Mat.
23-
Sci. 136 144-149 (May 2017)
21+
- Wiley S. Morgan, Gus L. W. Hart, Rodney W. Forcade, "Generating derivative
22+
superstructures for systems with high configurational freedom," Comp. Mat.
23+
Sci. 136 144-149 (May 2017)
2424
"""
2525

2626
from __future__ import annotations
@@ -33,7 +33,7 @@
3333
import subprocess
3434
from glob import glob
3535
from shutil import which
36-
from threading import Timer
36+
from typing import TYPE_CHECKING
3737

3838
import numpy as np
3939
from monty.dev import requires
@@ -44,17 +44,19 @@
4444
from pymatgen.io.vasp.inputs import Poscar
4545
from pymatgen.symmetry.analyzer import SpacegroupAnalyzer
4646

47+
if TYPE_CHECKING:
48+
from typing import ClassVar
49+
4750
logger = logging.getLogger(__name__)
4851

49-
# Favor the use of the newer "enum.x" by Gus Hart instead of the older
50-
# "multienum.x"
51-
enum_cmd = which("enum.x") or which("multienum.x")
52-
# prefer makestr.x at present
53-
makestr_cmd = which("makestr.x") or which("makeStr.x") or which("makeStr.py")
52+
# Favor the use of the newer "enum.x" by Gus Hart over "multienum.x"
53+
ENUM_CMD = which("enum.x") or which("multienum.x")
54+
# Prefer makestr.x at present
55+
MAKESTR_CMD = which("makestr.x") or which("makeStr.x") or which("makeStr.py")
5456

5557

5658
@requires(
57-
enum_cmd and makestr_cmd,
59+
ENUM_CMD and MAKESTR_CMD,
5860
"EnumlibAdaptor requires the executables 'enum.x' or 'multienum.x' "
5961
"and 'makestr.x' or 'makeStr.py' to be in the path. Please download the "
6062
"library at https://github.com/msg-byu/enumlib and follow the instructions "
@@ -64,26 +66,26 @@ class EnumlibAdaptor:
6466
"""An adaptor for enumlib.
6567
6668
Attributes:
67-
structures (list): all enumerated structures.
69+
structures (list[Structure]): all enumerated structures.
6870
"""
6971

70-
amount_tol = 1e-5
72+
amount_tol: ClassVar[float] = 1e-5
7173

7274
def __init__(
7375
self,
74-
structure,
75-
min_cell_size=1,
76-
max_cell_size=1,
77-
symm_prec=0.1,
78-
enum_precision_parameter=0.001,
79-
refine_structure=False,
80-
check_ordered_symmetry=True,
81-
timeout=None,
82-
):
76+
structure: Structure,
77+
min_cell_size: int = 1,
78+
max_cell_size: int = 1,
79+
symm_prec: float = 0.1,
80+
enum_precision_parameter: float = 0.001,
81+
refine_structure: bool = False,
82+
check_ordered_symmetry: bool = True,
83+
timeout: float | None = None,
84+
) -> None:
8385
"""Initialize the adapter with a structure and some parameters.
8486
8587
Args:
86-
structure: An input structure.
88+
structure (Structure): An input structure.
8789
min_cell_size (int): The minimum cell size wanted. Defaults to 1.
8890
max_cell_size (int): The maximum cell size wanted. Defaults to 1.
8991
symm_prec (float): Symmetry precision. Defaults to 0.1.
@@ -117,61 +119,65 @@ def __init__(
117119
self.structure = finder.get_refined_structure()
118120
else:
119121
self.structure = structure
122+
120123
self.min_cell_size = min_cell_size
121124
self.max_cell_size = max_cell_size
122125
self.symm_prec = symm_prec
123126
self.enum_precision_parameter = enum_precision_parameter
124127
self.check_ordered_symmetry = check_ordered_symmetry
125128
self.timeout = timeout
126129

127-
def run(self):
130+
def run(self) -> None:
128131
"""Run the enumeration."""
129-
# Create a temporary directory for working.
132+
# Work in a temporary directory
130133
with ScratchDir(".") as tmp_dir:
131134
logger.debug(f"Temp dir : {tmp_dir}")
132135
# Generate input files
133136
self._gen_input_file()
137+
134138
# Perform the actual enumeration
135139
num_structs = self._run_multienum()
140+
136141
# Read in the enumeration output as structures.
137142
if num_structs > 0:
138143
self.structures = self._get_structures(num_structs)
139144
else:
140145
raise EnumError("Unable to enumerate structure.")
141146

142-
def _gen_input_file(self):
147+
def _gen_input_file(self) -> None:
143148
"""Generate the necessary struct_enum.in file for enumlib. See enumlib
144149
documentation for details.
145150
"""
146151
coord_format = "{:.6f} {:.6f} {:.6f}"
147-
# Using symmetry finder, get the symmetrically distinct sites.
152+
# Use symmetry finder to get the symmetrically distinct sites
148153
fitter = SpacegroupAnalyzer(self.structure, self.symm_prec)
149154
symmetrized_structure = fitter.get_symmetrized_structure()
150155
logger.debug(
151156
f"Spacegroup {fitter.get_space_group_symbol()} ({fitter.get_space_group_number()}) "
152157
f"with {len(symmetrized_structure.equivalent_sites)} distinct sites"
153158
)
154-
"""Enumlib doesn"t work when the number of species get too large. To
155-
simplify matters, we generate the input file only with disordered sites
156-
and exclude the ordered sites from the enumeration. The fact that
157-
different disordered sites with the exact same species may belong to
158-
different equivalent sites is dealt with by having determined the
159-
spacegroup earlier and labelling the species differently.
160-
"""
161-
# index_species and index_amounts store mappings between the indices
159+
160+
# Enumlib doesn"t work when the number of species get too large. To
161+
# simplify matters, we generate the input file only with disordered sites
162+
# and exclude the ordered sites from the enumeration. The fact that
163+
# different disordered sites with the exact same species may belong to
164+
# different equivalent sites is dealt with by having determined the
165+
# spacegroup earlier and labelling the species differently.
166+
167+
# `index_species` and `index_amounts` store mappings between the indices
162168
# used in the enum input file, and the actual species and amounts.
163169
index_species = []
164170
index_amounts = []
165171

166-
# Stores the ordered sites, which are not enumerated.
172+
# Store the ordered sites, which are not enumerated.
167173
ordered_sites = []
168174
disordered_sites = []
169175
coord_str = []
170176
for sites in symmetrized_structure.equivalent_sites:
171177
if sites[0].is_ordered:
172178
ordered_sites.append(sites)
173179
else:
174-
sp_label = []
180+
_sp_label: list[int] = []
175181
species = dict(sites[0].species.items())
176182
if sum(species.values()) < 1 - EnumlibAdaptor.amount_tol:
177183
# Let us first make add a dummy element for every single
@@ -180,42 +186,41 @@ def _gen_input_file(self):
180186
for sp, amt in species.items():
181187
if sp not in index_species:
182188
index_species.append(sp)
183-
sp_label.append(len(index_species) - 1)
189+
_sp_label.append(len(index_species) - 1)
184190
index_amounts.append(amt * len(sites))
185191
else:
186192
ind = index_species.index(sp)
187-
sp_label.append(ind)
193+
_sp_label.append(ind)
188194
index_amounts[ind] += amt * len(sites)
189-
sp_label = "/".join(f"{i}" for i in sorted(sp_label))
195+
sp_label: str = "/".join(f"{i}" for i in sorted(_sp_label))
190196
for site in sites:
191197
coord_str.append(f"{coord_format.format(*site.coords)} {sp_label}")
192198
disordered_sites.append(sites)
193199

194-
def get_sg_info(ss):
200+
def get_sg_number(ss) -> int:
195201
finder = SpacegroupAnalyzer(Structure.from_sites(ss), self.symm_prec)
196202
return finder.get_space_group_number()
197203

198-
target_sg_num = get_sg_info(list(symmetrized_structure))
204+
target_sg_num = get_sg_number(list(symmetrized_structure))
199205
curr_sites = list(itertools.chain.from_iterable(disordered_sites))
200-
sg_num = get_sg_info(curr_sites)
206+
sg_num = get_sg_number(curr_sites)
201207
ordered_sites = sorted(ordered_sites, key=len)
202208
logger.debug(f"Disordered sites has sg # {sg_num}")
203209
self.ordered_sites = []
204210

205-
# progressively add ordered sites to our disordered sites
211+
# Progressively add ordered sites to our disordered sites
206212
# until we match the symmetry of our input structure
207213
if self.check_ordered_symmetry:
208214
while sg_num != target_sg_num and len(ordered_sites) > 0:
209215
sites = ordered_sites.pop(0)
210216
temp_sites = list(curr_sites) + sites
211-
new_sg_num = get_sg_info(temp_sites)
217+
new_sg_num = get_sg_number(temp_sites)
212218
if sg_num != new_sg_num:
213219
logger.debug(f"Adding {sites[0].specie} in enum. New sg # {new_sg_num}")
214220
index_species.append(sites[0].specie)
215221
index_amounts.append(len(sites))
216-
sp_label = len(index_species) - 1
217222
for site in sites:
218-
coord_str.append(f"{coord_format.format(*site.coords)} {sp_label}")
223+
coord_str.append(f"{coord_format.format(*site.coords)} {len(index_species) - 1}")
219224
disordered_sites.append(sites)
220225
curr_sites = temp_sites
221226
sg_num = new_sg_num
@@ -274,29 +279,29 @@ def get_sg_info(ss):
274279
min_conc = math.floor(conc * base)
275280
output.append(f"{min_conc - 1} {min_conc + 1} {base}")
276281
output.append("")
282+
277283
logger.debug("Generated input file:\n" + "\n".join(output))
278284
with open("struct_enum.in", mode="w", encoding="utf-8") as file:
279285
file.write("\n".join(output))
280286

281-
def _run_multienum(self):
282-
with subprocess.Popen([enum_cmd], stdout=subprocess.PIPE, stdin=subprocess.PIPE, close_fds=True) as process:
283-
if self.timeout:
284-
timed_out = False
285-
timer = Timer(self.timeout * 60, lambda p: p.kill(), [process])
287+
def _run_multienum(self) -> int:
288+
"""Run enumlib to get multiple structure.
286289
287-
try:
288-
timer.start()
289-
output = process.communicate()[0].decode("utf-8")
290-
finally:
291-
if not timer.is_alive():
292-
timed_out = True
293-
timer.cancel()
290+
Returns:
291+
int: number of structures.
292+
"""
293+
if ENUM_CMD is None:
294+
raise RuntimeError("enumlib is not available")
294295

295-
if timed_out:
296-
raise TimeoutError("Enumeration took too long")
296+
with subprocess.Popen([ENUM_CMD], stdout=subprocess.PIPE, stdin=subprocess.PIPE, close_fds=True) as process:
297+
timeout = self.timeout * 60 if self.timeout is not None else None
297298

298-
else:
299-
output = process.communicate()[0].decode("utf-8")
299+
try:
300+
output = process.communicate(timeout=timeout)[0].decode("utf-8")
301+
except subprocess.TimeoutExpired as exc:
302+
process.kill()
303+
process.wait()
304+
raise TimeoutError(f"Enumeration took more than timeout {self.timeout} minutes") from exc
300305

301306
count = 0
302307
start_count = False
@@ -308,37 +313,41 @@ def _run_multienum(self):
308313
logger.debug(f"Enumeration resulted in {count} structures")
309314
return count
310315

311-
def _get_structures(self, num_structs):
312-
structs = []
316+
def _get_structures(self, num_structs: int) -> list[Structure]:
317+
if MAKESTR_CMD is None:
318+
raise RuntimeError("makestr.x is not available")
319+
320+
structs: list[Structure] = []
313321

314-
if ".py" in makestr_cmd:
315-
options = ["-input", "struct_enum.out", str(1), str(num_structs)]
322+
if ".py" in MAKESTR_CMD:
323+
options: tuple[str, ...] = ("-input", "struct_enum.out", str(1), str(num_structs))
316324
else:
317-
options = ["struct_enum.out", str(0), str(num_structs - 1)]
325+
options = ("struct_enum.out", str(0), str(num_structs - 1))
318326

319327
with subprocess.Popen(
320-
[makestr_cmd, *options],
328+
[MAKESTR_CMD, *options],
321329
stdout=subprocess.PIPE,
322330
stdin=subprocess.PIPE,
323331
close_fds=True,
324332
) as rs:
325333
_stdout, stderr = rs.communicate()
334+
326335
if stderr:
327336
logger.warning(stderr.decode())
328337

329-
# sites retrieved from enumlib will lack site properties
338+
# Sites retrieved from enumlib will lack site properties
330339
# to ensure consistency, we keep track of what site properties
331340
# are missing and set them to None
332341
# TODO: improve this by mapping ordered structure to original
333342
# disordered structure, and retrieving correct site properties
334-
disordered_site_properties = {}
343+
disordered_site_properties: dict = {}
335344

336345
if len(self.ordered_sites) > 0:
337346
original_latt = self.ordered_sites[0].lattice
338347
# Need to strip sites of site_properties, which would otherwise
339348
# result in an index error. Hence Structure is reconstructed in
340349
# the next step.
341-
site_properties = {}
350+
site_properties: dict = {}
342351
for site in self.ordered_sites:
343352
for k, v in site.properties.items():
344353
disordered_site_properties[k] = None
@@ -357,8 +366,8 @@ def _get_structures(self, num_structs):
357366
ordered_structure = inv_org_latt = None
358367

359368
for file in glob("vasp.*"):
360-
with open(file, encoding="utf-8") as file:
361-
data = file.read()
369+
with open(file, encoding="utf-8") as _file:
370+
data = _file.read()
362371
data = re.sub(r"scale factor", "1", data)
363372
data = re.sub(r"(\d+)-(\d+)", r"\1 -\2", data)
364373
poscar = Poscar.from_str(data, self.index_species)

0 commit comments

Comments
 (0)