From 03f672b01fff430d4102e57757135e757e3d5fec Mon Sep 17 00:00:00 2001 From: Andrew-AbiMansour Date: Fri, 9 Apr 2021 12:03:45 -0400 Subject: [PATCH 01/11] execute supports not loading outfiles in memory --- qcengine/util.py | 45 +++++++++++++++++++++++++++++++-------------- 1 file changed, 31 insertions(+), 14 deletions(-) diff --git a/qcengine/util.py b/qcengine/util.py index 45177fede..9bb635b05 100644 --- a/qcengine/util.py +++ b/qcengine/util.py @@ -389,6 +389,7 @@ def execute( environment: Optional[Dict[str, str]] = None, shell: Optional[bool] = False, exit_code: Optional[int] = 0, + outfiles_load: Optional[bool] = True, ) -> Tuple[bool, Dict[str, Any]]: """ Runs a process in the background until complete. @@ -428,7 +429,9 @@ def execute( Run command through the shell. exit_code: int, optional The exit code above which the process is considered failure. - + outfiles_load: bool, optional + Load output file(s) contents in outfiles. If set to False, + outfiles stores the posix path(s) instead. Raises ------ FileExistsError @@ -471,7 +474,7 @@ def execute( ) as scrdir: popen_kwargs["cwd"] = scrdir popen_kwargs["shell"] = shell - with disk_files(infiles, outfiles, cwd=scrdir, as_binary=as_binary) as extrafiles: + with disk_files(infiles, outfiles, cwd=scrdir, as_binary=as_binary, outfiles_load=outfiles_load) as extrafiles: with popen(command, popen_kwargs=popen_kwargs) as proc: # Wait for the subprocess to complete or the timeout to expire if interupt_after is None: @@ -562,6 +565,7 @@ def disk_files( *, cwd: Optional[str] = None, as_binary: Optional[List[str]] = None, + outfiles_load: Optional[bool] = True, ) -> Dict[str, Union[str, bytes]]: """Write and collect files. @@ -577,7 +581,9 @@ def disk_files( Directory to which to write and read files. as_binary : List[str] = None Keys in `infiles` (`outfiles`) to be written (read) as bytes, not decoded. - + outfiles_load: bool = True + Load output file(s) contents in outfiles. If set to False + outfiles stores the posix path(s) instead. Yields ------ Dict[str] = str @@ -604,21 +610,32 @@ def disk_files( finally: for fl in outfiles.keys(): - omode = "rb" if fl in as_binary else "r" - try: - filename = lwd / fl - with open(filename, omode) as fp: - outfiles[fl] = fp.read() - LOGGER.info(f"... Writing ({omode}): {filename}") - except (OSError, FileNotFoundError): + filename = lwd / fl + if outfiles_load: + omode = "rb" if fl in as_binary else "r" + try: + with open(filename, omode) as fp: + outfiles[fl] = fp.read() + LOGGER.info(f"... Writing ({omode}): {filename}") + except (OSError, FileNotFoundError): + if "*" in fl: + gfls = {} + for gfl in lwd.glob(fl): + with open(gfl, omode) as fp: + gfls[gfl.name] = fp.read() + LOGGER.info(f"... Writing ({omode}): {gfl}") + if not gfls: + gfls = None + outfiles[fl] = gfls + else: + outfiles[fl] = None + else: if "*" in fl: gfls = {} for gfl in lwd.glob(fl): - with open(gfl, omode) as fp: - gfls[gfl.name] = fp.read() - LOGGER.info(f"... Writing ({omode}): {gfl}") + gfls[gfl.name] = gfl if not gfls: gfls = None outfiles[fl] = gfls else: - outfiles[fl] = None + outfiles[fl] = filename From 01a7425a894f1994c748a0fb0be23ae3c5f825eb Mon Sep 17 00:00:00 2001 From: Andrew-AbiMansour Date: Mon, 12 Apr 2021 11:20:57 -0400 Subject: [PATCH 02/11] check for file existence when load_files=False --- qcengine/util.py | 13 ++++++++----- 1 file changed, 8 insertions(+), 5 deletions(-) diff --git a/qcengine/util.py b/qcengine/util.py index 9bb635b05..6f1764b9c 100644 --- a/qcengine/util.py +++ b/qcengine/util.py @@ -431,7 +431,7 @@ def execute( The exit code above which the process is considered failure. outfiles_load: bool, optional Load output file(s) contents in outfiles. If set to False, - outfiles stores the posix path(s) instead. + outfiles stores the path(s) instead. Raises ------ FileExistsError @@ -583,7 +583,7 @@ def disk_files( Keys in `infiles` (`outfiles`) to be written (read) as bytes, not decoded. outfiles_load: bool = True Load output file(s) contents in outfiles. If set to False - outfiles stores the posix path(s) instead. + outfiles stores the path(s) instead. Yields ------ Dict[str] = str @@ -630,12 +630,15 @@ def disk_files( else: outfiles[fl] = None else: - if "*" in fl: + if filename.is_file(): + outfiles[fl] = filename + elif "*" in fl: gfls = {} for gfl in lwd.glob(fl): - gfls[gfl.name] = gfl + if gfl.is_file(): + gfls[gfl.name] = gfl if not gfls: gfls = None outfiles[fl] = gfls else: - outfiles[fl] = filename + outfiles[fl] = None From 335817ed9600398053c42b41244da4915eff046f Mon Sep 17 00:00:00 2001 From: Andrew-AbiMansour Date: Mon, 12 Apr 2021 11:21:45 -0400 Subject: [PATCH 03/11] parameterize test_disk_files for outfiles_load --- qcengine/tests/test_utils.py | 14 ++++++++++---- 1 file changed, 10 insertions(+), 4 deletions(-) diff --git a/qcengine/tests/test_utils.py b/qcengine/tests/test_utils.py index f1203da0b..38a5e6649 100644 --- a/qcengine/tests/test_utils.py +++ b/qcengine/tests/test_utils.py @@ -1,6 +1,7 @@ import os import sys import time +import pathlib import pytest from qcelemental.models import AtomicInput @@ -64,17 +65,22 @@ def test_tmpdir(): assert str(tmpdir).split(os.path.sep)[-1].endswith("this") -def test_disk_files(): +@pytest.mark.parametrize("outfiles_load", [True, False]) +def test_disk_files(outfiles_load): infiles = {"thing1": "hello", "thing2": "world", "other": "everyone"} outfiles = {"thing*": None, "other": None} with util.temporary_directory(suffix="this") as tmpdir: - with util.disk_files(infiles=infiles, outfiles=outfiles, cwd=tmpdir): + with util.disk_files(infiles=infiles, outfiles=outfiles, cwd=tmpdir, outfiles_load=outfiles_load): pass assert outfiles.keys() == {"thing*", "other"} - assert outfiles["thing*"]["thing1"] == "hello" - assert outfiles["other"] == "everyone" + if outfiles_load: + assert outfiles["thing*"]["thing1"] == "hello" + assert outfiles["other"] == "everyone" + else: + assert isinstance(outfiles["thing*"]["thing1"], pathlib.PurePath) + assert isinstance(outfiles["other"], pathlib.PurePath) def test_popen_tee_output(capsys): From 2df24d3a0c6e4c322da2d2ae638d1d99729a105e Mon Sep 17 00:00:00 2001 From: Andrew-AbiMansour Date: Mon, 12 Apr 2021 14:19:22 -0400 Subject: [PATCH 04/11] improve type hints, info msgs --- qcengine/util.py | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/qcengine/util.py b/qcengine/util.py index 6f1764b9c..60d82ca2d 100644 --- a/qcengine/util.py +++ b/qcengine/util.py @@ -566,7 +566,7 @@ def disk_files( cwd: Optional[str] = None, as_binary: Optional[List[str]] = None, outfiles_load: Optional[bool] = True, -) -> Dict[str, Union[str, bytes]]: +) -> Dict[str, Union[str, bytes, Path]]: """Write and collect files. Parameters @@ -586,7 +586,7 @@ def disk_files( outfiles stores the path(s) instead. Yields ------ - Dict[str] = str + Dict[str, Union[str, bytes, Path]] outfiles with RHS filled in. """ @@ -616,14 +616,14 @@ def disk_files( try: with open(filename, omode) as fp: outfiles[fl] = fp.read() - LOGGER.info(f"... Writing ({omode}): {filename}") + LOGGER.info(f"... Reading ({omode}): {filename}") except (OSError, FileNotFoundError): if "*" in fl: gfls = {} for gfl in lwd.glob(fl): with open(gfl, omode) as fp: gfls[gfl.name] = fp.read() - LOGGER.info(f"... Writing ({omode}): {gfl}") + LOGGER.info(f"... Reading ({omode}): {gfl}") if not gfls: gfls = None outfiles[fl] = gfls From 132f5f869574394c1267f915cc76bc0f78b885a3 Mon Sep 17 00:00:00 2001 From: Andrew-AbiMansour Date: Tue, 4 May 2021 04:34:00 -0400 Subject: [PATCH 05/11] replace outfiles_load w/ outfiles_track, generalize test_disk_files --- qcengine/tests/test_utils.py | 25 ++++++++++++++++--------- 1 file changed, 16 insertions(+), 9 deletions(-) diff --git a/qcengine/tests/test_utils.py b/qcengine/tests/test_utils.py index 38a5e6649..a41873afc 100644 --- a/qcengine/tests/test_utils.py +++ b/qcengine/tests/test_utils.py @@ -65,22 +65,29 @@ def test_tmpdir(): assert str(tmpdir).split(os.path.sep)[-1].endswith("this") -@pytest.mark.parametrize("outfiles_load", [True, False]) -def test_disk_files(outfiles_load): +@pytest.mark.parametrize("outfiles_track", [[], ["thing*"], ["thing*", "other"]]) +def test_disk_files(outfiles_track): infiles = {"thing1": "hello", "thing2": "world", "other": "everyone"} outfiles = {"thing*": None, "other": None} with util.temporary_directory(suffix="this") as tmpdir: - with util.disk_files(infiles=infiles, outfiles=outfiles, cwd=tmpdir, outfiles_load=outfiles_load): + with util.disk_files(infiles=infiles, outfiles=outfiles, cwd=tmpdir, outfiles_track=outfiles_track): pass assert outfiles.keys() == {"thing*", "other"} - if outfiles_load: - assert outfiles["thing*"]["thing1"] == "hello" - assert outfiles["other"] == "everyone" - else: - assert isinstance(outfiles["thing*"]["thing1"], pathlib.PurePath) - assert isinstance(outfiles["other"], pathlib.PurePath) + for ofile, ofile_val in outfiles.items(): + if isinstance(ofile_val, dict): + if ofile in outfiles_track: + for fpath in ofile_val.values(): + assert isinstance(fpath, pathlib.PurePath) + else: + for key in ofile_val.keys(): + print(key) + assert ofile_val[key] == infiles[key] + elif ofile in outfiles_track: + assert isinstance(ofile_val, pathlib.PurePath) + else: + assert ofile_val == infiles[ofile] def test_popen_tee_output(capsys): From f66a88b35a359420c68c49feac20f864c0bf5cbc Mon Sep 17 00:00:00 2001 From: Andrew-AbiMansour Date: Tue, 4 May 2021 04:42:30 -0400 Subject: [PATCH 06/11] enable tracking/reading of individual files --- qcengine/util.py | 25 +++++++++++++++---------- 1 file changed, 15 insertions(+), 10 deletions(-) diff --git a/qcengine/util.py b/qcengine/util.py index 60d82ca2d..4d5660b98 100644 --- a/qcengine/util.py +++ b/qcengine/util.py @@ -377,6 +377,7 @@ def execute( infiles: Optional[Dict[str, str]] = None, outfiles: Optional[List[str]] = None, *, + outfiles_track: Optional[List[str]] = [], as_binary: Optional[List[str]] = None, scratch_name: Optional[str] = None, scratch_directory: Optional[str] = None, @@ -389,7 +390,6 @@ def execute( environment: Optional[Dict[str, str]] = None, shell: Optional[bool] = False, exit_code: Optional[int] = 0, - outfiles_load: Optional[bool] = True, ) -> Tuple[bool, Dict[str, Any]]: """ Runs a process in the background until complete. @@ -405,6 +405,11 @@ def execute( outfiles : List[str] = None Output file names to be collected after execution into values. May be {}. + outfiles_track: List[str], optional + Keys of `outfiles` to keep track of without loading their contents in memory. + For specified filename in `outfiles_track`, the file path instead of contents + is stored in `outfiles`. To ensure tracked files are not deleted after execution, + you must set `scratch_messy=True`. as_binary : List[str] = None Keys of `infiles` or `outfiles` to be treated as bytes. scratch_name : str, optional @@ -429,9 +434,6 @@ def execute( Run command through the shell. exit_code: int, optional The exit code above which the process is considered failure. - outfiles_load: bool, optional - Load output file(s) contents in outfiles. If set to False, - outfiles stores the path(s) instead. Raises ------ FileExistsError @@ -474,7 +476,9 @@ def execute( ) as scrdir: popen_kwargs["cwd"] = scrdir popen_kwargs["shell"] = shell - with disk_files(infiles, outfiles, cwd=scrdir, as_binary=as_binary, outfiles_load=outfiles_load) as extrafiles: + with disk_files( + infiles, outfiles, cwd=scrdir, as_binary=as_binary, outfiles_track=outfiles_track + ) as extrafiles: with popen(command, popen_kwargs=popen_kwargs) as proc: # Wait for the subprocess to complete or the timeout to expire if interupt_after is None: @@ -565,7 +569,7 @@ def disk_files( *, cwd: Optional[str] = None, as_binary: Optional[List[str]] = None, - outfiles_load: Optional[bool] = True, + outfiles_track: Optional[List[str]] = [], ) -> Dict[str, Union[str, bytes, Path]]: """Write and collect files. @@ -581,9 +585,10 @@ def disk_files( Directory to which to write and read files. as_binary : List[str] = None Keys in `infiles` (`outfiles`) to be written (read) as bytes, not decoded. - outfiles_load: bool = True - Load output file(s) contents in outfiles. If set to False - outfiles stores the path(s) instead. + outfiles_track: List[str], optional + Keys of `outfiles` to keep track of (i.e. file contents not loaded in memory). + For specified filename in `outfiles_track`, the file path instead of contents + is stored in `outfiles`. Yields ------ Dict[str, Union[str, bytes, Path]] @@ -611,7 +616,7 @@ def disk_files( finally: for fl in outfiles.keys(): filename = lwd / fl - if outfiles_load: + if fl not in outfiles_track: omode = "rb" if fl in as_binary else "r" try: with open(filename, omode) as fp: From 2a7806432e1724a06a8266f988d357c9f6351f6f Mon Sep 17 00:00:00 2001 From: Andrew-AbiMansour Date: Tue, 4 May 2021 14:07:32 -0400 Subject: [PATCH 07/11] reformat code with black --- qcengine/tests/test_cli.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/qcengine/tests/test_cli.py b/qcengine/tests/test_cli.py index 83f786260..ad8884ab6 100644 --- a/qcengine/tests/test_cli.py +++ b/qcengine/tests/test_cli.py @@ -36,7 +36,7 @@ def run_qcengine_cli(args: List[str], stdin: str = None) -> str: def test_no_args(): - """ Test for qcengine with no arguments """ + """Test for qcengine with no arguments""" try: run_qcengine_cli([]) except subprocess.CalledProcessError as e: From 02b94322e5c7b656a72308af62ade31f2e4b8ebc Mon Sep 17 00:00:00 2001 From: Andrew-AbiMansour Date: Tue, 4 May 2021 14:20:48 -0400 Subject: [PATCH 08/11] replace travis with GHA CI badge --- README.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/README.md b/README.md index ec3c46d57..be24b54c7 100644 --- a/README.md +++ b/README.md @@ -1,6 +1,6 @@ QCEngine ======== -[![Travis build](https://img.shields.io/travis/MolSSI/QCEngine/master.svg?logo=linux&logoColor=white)](https://travis-ci.org/MolSSI/QCEngine) +[![CI](https://github.com/MolSSI/QCEngine/actions/workflows/CI.yml/badge.svg)](https://github.com/MolSSI/QCEngine/actions/workflows/CI.yml) [![codecov](https://img.shields.io/codecov/c/github/MolSSI/QCEngine.svg?logo=Codecov&logoColor=white)](https://codecov.io/gh/MolSSI/QCEngine) [![Language grade: Python](https://img.shields.io/lgtm/grade/python/g/MolSSI/QCEngine.svg?logo=lgtm&logoWidth=18)](https://lgtm.com/projects/g/MolSSI/QCEngine/context:python) [![Documentation Status](https://readthedocs.org/projects/qcengine/badge/?version=latest)](https://qcengine.readthedocs.io/en/latest/?badge=latest) From 4a3e389d4b51b2a9e96274a6e171d6a2d0a1cef0 Mon Sep 17 00:00:00 2001 From: Andrew-AbiMansour Date: Tue, 4 May 2021 15:03:55 -0400 Subject: [PATCH 09/11] remove print statement --- qcengine/tests/test_utils.py | 1 - 1 file changed, 1 deletion(-) diff --git a/qcengine/tests/test_utils.py b/qcengine/tests/test_utils.py index a41873afc..38a6440f4 100644 --- a/qcengine/tests/test_utils.py +++ b/qcengine/tests/test_utils.py @@ -82,7 +82,6 @@ def test_disk_files(outfiles_track): assert isinstance(fpath, pathlib.PurePath) else: for key in ofile_val.keys(): - print(key) assert ofile_val[key] == infiles[key] elif ofile in outfiles_track: assert isinstance(ofile_val, pathlib.PurePath) From 3308c0aacc6ab175effba86ea9993aa33a721fa4 Mon Sep 17 00:00:00 2001 From: Andrew-AbiMansour Date: Sat, 8 May 2021 00:53:20 -0400 Subject: [PATCH 10/11] outfiles_track is None by def --- qcengine/util.py | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/qcengine/util.py b/qcengine/util.py index 4d5660b98..9d8db5110 100644 --- a/qcengine/util.py +++ b/qcengine/util.py @@ -377,7 +377,7 @@ def execute( infiles: Optional[Dict[str, str]] = None, outfiles: Optional[List[str]] = None, *, - outfiles_track: Optional[List[str]] = [], + outfiles_track: Optional[List[str]] = None, as_binary: Optional[List[str]] = None, scratch_name: Optional[str] = None, scratch_directory: Optional[str] = None, @@ -569,7 +569,7 @@ def disk_files( *, cwd: Optional[str] = None, as_binary: Optional[List[str]] = None, - outfiles_track: Optional[List[str]] = [], + outfiles_track: Optional[List[str]] = None, ) -> Dict[str, Union[str, bytes, Path]]: """Write and collect files. @@ -603,6 +603,8 @@ def disk_files( as_binary = [] assert set(as_binary) <= (set(infiles) | set(outfiles)) + outfiles_track = outfiles_track or [] + try: for fl, content in infiles.items(): omode = "wb" if fl in as_binary else "w" From a3d0ace2a6034e8993264b5e5ce25916370998e8 Mon Sep 17 00:00:00 2001 From: Andrew-AbiMansour Date: Wed, 19 May 2021 13:53:21 -0400 Subject: [PATCH 11/11] support '*' for outfiles_track --- qcengine/util.py | 39 ++++++++++++++++++--------------------- 1 file changed, 18 insertions(+), 21 deletions(-) diff --git a/qcengine/util.py b/qcengine/util.py index 9d8db5110..0cf09f146 100644 --- a/qcengine/util.py +++ b/qcengine/util.py @@ -616,34 +616,31 @@ def disk_files( yield outfiles finally: + outfiles_track = [ + fpath.name if "*" in track else track for track in outfiles_track for fpath in lwd.glob(track) + ] for fl in outfiles.keys(): filename = lwd / fl - if fl not in outfiles_track: - omode = "rb" if fl in as_binary else "r" - try: - with open(filename, omode) as fp: + omode = "rb" if fl in as_binary else "r" + try: + with open(filename, omode) as fp: + if fl not in outfiles_track: outfiles[fl] = fp.read() LOGGER.info(f"... Reading ({omode}): {filename}") - except (OSError, FileNotFoundError): - if "*" in fl: - gfls = {} - for gfl in lwd.glob(fl): - with open(gfl, omode) as fp: - gfls[gfl.name] = fp.read() - LOGGER.info(f"... Reading ({omode}): {gfl}") - if not gfls: - gfls = None - outfiles[fl] = gfls else: - outfiles[fl] = None - else: - if filename.is_file(): - outfiles[fl] = filename - elif "*" in fl: + outfiles[fl] = filename + LOGGER.info(f"... Tracking: {filename}") + except (OSError, FileNotFoundError): + if "*" in fl: gfls = {} for gfl in lwd.glob(fl): - if gfl.is_file(): - gfls[gfl.name] = gfl + with open(gfl, omode) as fp: + if gfl.name not in outfiles_track: + gfls[gfl.name] = fp.read() + LOGGER.info(f"... Reading ({omode}): {gfl}") + else: + gfls[gfl.name] = gfl + LOGGER.info(f"... Tracking: {gfl}") if not gfls: gfls = None outfiles[fl] = gfls