Skip to content
2 changes: 1 addition & 1 deletion README.md
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
QCEngine
========
[![Travis build](https://img.shields.io/travis/MolSSI/QCEngine/master.svg?logo=linux&logoColor=white)](https://travis-ci.org/MolSSI/QCEngine)
[![CI](https://github.com/MolSSI/QCEngine/actions/workflows/CI.yml/badge.svg)](https://github.com/MolSSI/QCEngine/actions/workflows/CI.yml)
[![codecov](https://img.shields.io/codecov/c/github/MolSSI/QCEngine.svg?logo=Codecov&logoColor=white)](https://codecov.io/gh/MolSSI/QCEngine)
[![Language grade: Python](https://img.shields.io/lgtm/grade/python/g/MolSSI/QCEngine.svg?logo=lgtm&logoWidth=18)](https://lgtm.com/projects/g/MolSSI/QCEngine/context:python)
[![Documentation Status](https://readthedocs.org/projects/qcengine/badge/?version=latest)](https://qcengine.readthedocs.io/en/latest/?badge=latest)
Expand Down
2 changes: 1 addition & 1 deletion qcengine/tests/test_cli.py
Original file line number Diff line number Diff line change
Expand Up @@ -36,7 +36,7 @@ def run_qcengine_cli(args: List[str], stdin: str = None) -> str:


def test_no_args():
""" Test for qcengine with no arguments """
"""Test for qcengine with no arguments"""
try:
run_qcengine_cli([])
except subprocess.CalledProcessError as e:
Expand Down
20 changes: 16 additions & 4 deletions qcengine/tests/test_utils.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,7 @@
import os
import sys
import time
import pathlib

import pytest
from qcelemental.models import AtomicInput
Expand Down Expand Up @@ -64,17 +65,28 @@ def test_tmpdir():
assert str(tmpdir).split(os.path.sep)[-1].endswith("this")


def test_disk_files():
@pytest.mark.parametrize("outfiles_track", [[], ["thing*"], ["thing*", "other"]])
def test_disk_files(outfiles_track):

infiles = {"thing1": "hello", "thing2": "world", "other": "everyone"}
outfiles = {"thing*": None, "other": None}
with util.temporary_directory(suffix="this") as tmpdir:
with util.disk_files(infiles=infiles, outfiles=outfiles, cwd=tmpdir):
with util.disk_files(infiles=infiles, outfiles=outfiles, cwd=tmpdir, outfiles_track=outfiles_track):
pass

assert outfiles.keys() == {"thing*", "other"}
assert outfiles["thing*"]["thing1"] == "hello"
assert outfiles["other"] == "everyone"
for ofile, ofile_val in outfiles.items():
if isinstance(ofile_val, dict):
if ofile in outfiles_track:
for fpath in ofile_val.values():
assert isinstance(fpath, pathlib.PurePath)
else:
for key in ofile_val.keys():
assert ofile_val[key] == infiles[key]
elif ofile in outfiles_track:
assert isinstance(ofile_val, pathlib.PurePath)
else:
assert ofile_val == infiles[ofile]


def test_popen_tee_output(capsys):
Expand Down
44 changes: 34 additions & 10 deletions qcengine/util.py
Original file line number Diff line number Diff line change
Expand Up @@ -377,6 +377,7 @@ def execute(
infiles: Optional[Dict[str, str]] = None,
outfiles: Optional[List[str]] = None,
*,
outfiles_track: Optional[List[str]] = None,
as_binary: Optional[List[str]] = None,
scratch_name: Optional[str] = None,
scratch_directory: Optional[str] = None,
Expand Down Expand Up @@ -404,6 +405,11 @@ def execute(
outfiles : List[str] = None
Output file names to be collected after execution into
values. May be {}.
outfiles_track: List[str], optional
Keys of `outfiles` to keep track of without loading their contents in memory.
For specified filename in `outfiles_track`, the file path instead of contents
is stored in `outfiles`. To ensure tracked files are not deleted after execution,
you must set `scratch_messy=True`.
as_binary : List[str] = None
Keys of `infiles` or `outfiles` to be treated as bytes.
scratch_name : str, optional
Expand All @@ -428,7 +434,6 @@ def execute(
Run command through the shell.
exit_code: int, optional
The exit code above which the process is considered failure.

Raises
------
FileExistsError
Expand Down Expand Up @@ -471,7 +476,9 @@ def execute(
) as scrdir:
popen_kwargs["cwd"] = scrdir
popen_kwargs["shell"] = shell
with disk_files(infiles, outfiles, cwd=scrdir, as_binary=as_binary) as extrafiles:
with disk_files(
infiles, outfiles, cwd=scrdir, as_binary=as_binary, outfiles_track=outfiles_track
) as extrafiles:
with popen(command, popen_kwargs=popen_kwargs) as proc:
# Wait for the subprocess to complete or the timeout to expire
if interupt_after is None:
Expand Down Expand Up @@ -562,7 +569,8 @@ def disk_files(
*,
cwd: Optional[str] = None,
as_binary: Optional[List[str]] = None,
) -> Dict[str, Union[str, bytes]]:
outfiles_track: Optional[List[str]] = None,
) -> Dict[str, Union[str, bytes, Path]]:
"""Write and collect files.

Parameters
Expand All @@ -577,10 +585,13 @@ def disk_files(
Directory to which to write and read files.
as_binary : List[str] = None
Keys in `infiles` (`outfiles`) to be written (read) as bytes, not decoded.

outfiles_track: List[str], optional
Keys of `outfiles` to keep track of (i.e. file contents not loaded in memory).
For specified filename in `outfiles_track`, the file path instead of contents
is stored in `outfiles`.
Yields
------
Dict[str] = str
Dict[str, Union[str, bytes, Path]]
outfiles with RHS filled in.

"""
Expand All @@ -592,6 +603,8 @@ def disk_files(
as_binary = []
assert set(as_binary) <= (set(infiles) | set(outfiles))

outfiles_track = outfiles_track or []

try:
for fl, content in infiles.items():
omode = "wb" if fl in as_binary else "w"
Expand All @@ -603,20 +616,31 @@ def disk_files(
yield outfiles

finally:
outfiles_track = [
fpath.name if "*" in track else track for track in outfiles_track for fpath in lwd.glob(track)
]
for fl in outfiles.keys():
filename = lwd / fl
omode = "rb" if fl in as_binary else "r"
try:
filename = lwd / fl
with open(filename, omode) as fp:
outfiles[fl] = fp.read()
LOGGER.info(f"... Writing ({omode}): {filename}")
if fl not in outfiles_track:
outfiles[fl] = fp.read()
LOGGER.info(f"... Reading ({omode}): {filename}")
else:
outfiles[fl] = filename
LOGGER.info(f"... Tracking: {filename}")
except (OSError, FileNotFoundError):
if "*" in fl:
gfls = {}
for gfl in lwd.glob(fl):
with open(gfl, omode) as fp:
gfls[gfl.name] = fp.read()
LOGGER.info(f"... Writing ({omode}): {gfl}")
if gfl.name not in outfiles_track:
gfls[gfl.name] = fp.read()
LOGGER.info(f"... Reading ({omode}): {gfl}")
else:
gfls[gfl.name] = gfl
LOGGER.info(f"... Tracking: {gfl}")
if not gfls:
gfls = None
outfiles[fl] = gfls
Expand Down