Skip to content

Commit b7935a4

Browse files
committed
gmxapi-79 Safe management of session working directories
Document proposed user interface for filesystem artifacts.
1 parent b02b6e5 commit b7935a4

File tree

3 files changed

+86
-12
lines changed

3 files changed

+86
-12
lines changed

src/gmx/context.py

Lines changed: 28 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -286,10 +286,20 @@ class ParallelArrayContext(object):
286286
... # rank = session.rank
287287
... # The local context object knows where it fits in the global array.
288288
... rank = context.rank
289-
... output_path = os.path.join(context.workdir_list[rank], 'traj.trr')
290-
... assert(os.path.exists(output_path))
291-
... print('Worker {} produced {}'.format(rank, output_path))
289+
... output = work[0]['traj.trr']
292290
...
291+
>>> output_path = str(output.extract())
292+
>>> assert(os.path.exists(output_path))
293+
294+
When the session is created to run the workflow, a uniquely named directory is created in the filesystem to be the
295+
session's working directory. This directory name is available in the attribute `context.path`. Each
296+
operation on each rank has its own subdirectory. In the example above, the directory for MD artifacts for each of
297+
the two ranks used can be accessed through `work.path[0]` and `work.path[1]`. Artifacts in each path can be accessed
298+
as dictionary keys. E.g. `work.path[0]['traj.trr']`.
299+
300+
Note that these attributes are proxy objects that may not exist at the time they are referenced with this syntax.
301+
To force the artifacts to be made available locally, use the `extract` method. The string representation of the
302+
returned object is a valid local absolute filename.
293303
294304
Implementation notes:
295305
@@ -479,7 +489,17 @@ def add_operation(self, namespace, operation, get_builder):
479489
def __load_tpr(self, element):
480490
"""Implement the gromacs.load_tpr operation.
481491
482-
Updates the minimum width of the workflow parallelism. Does not add any API object to the graph.
492+
File paths are taken to be relative to the session directory. Helper functions implemented for the Context
493+
should make sure to copy files into place or to ensure that the files are expected outputs of other operations.
494+
If the element has other elements listed in `depends` then the working directories of those elements are used
495+
to replace occurrences of the element names in the tpr filename arguments, using a forward slash (`/`) to separate
496+
the part of the string naming an element and the part of the string naming a relative file path.
497+
498+
Absolute filenames are not allowed, as they imply relation to an element named with a null string, which we
499+
would not want to respect even if it existed.
500+
501+
Updates the minimum width of the workflow parallelism. This operation is fused with the MD operation and does
502+
not add any API object to the graph.
483503
"""
484504
class Builder(object):
485505
def __init__(self, tpr_list):
@@ -578,17 +598,15 @@ def done():
578598
def __enter__(self):
579599
"""Implement Python context manager protocol, producing a Session for the specified work in this Context.
580600
601+
A session directory is created (if not yet present) with a unique key for the work specification. This prevents
602+
different work specifications from getting mixed in the same output directory. Each element in the work has its
603+
own subdirectory or subdirectories (one per worker) to hold artifacts and checkpoint information. The
604+
581605
Returns:
582606
Session object the can be run and/or inspected.
583607
584608
Additional API operations are possible while the Session is active. When used as a Python context manager,
585609
the Context will close the Session at the end of the `with` block by calling `__exit__`.
586-
587-
Note: this is probably where we will have to process the work specification to determine whether we
588-
have appropriate resources (such as sufficiently wide parallelism). Until we have a better Session
589-
abstraction, this means the clean approach should take two passes to first build a DAG and then
590-
instantiate objects to perform the work. In the first implementation, we kind of muddle things into
591-
a single pass.
592610
"""
593611
import numpy
594612
try:

src/gmx/test/test_context.py

Lines changed: 50 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -28,3 +28,53 @@ def test_setting(self):
2828
mdargs.set(param)
2929
context = gmx.core.Context()
3030
context.setMDArgs(mdargs)
31+
32+
class PathManagementTestCase(unittest.TestCase):
33+
"""Test proper directory management for load_tpr and Session startup.
34+
35+
- [ ] Session should use working directory keyed by WorkSpec unique identifier.
36+
- [ ] Existing directory should not be corrupted.
37+
- [ ] Existing directory should be checked for state.
38+
- [ ] File inputs should be made accessible to the Session.
39+
- [ ] Filesystem artifacts from an element should be accessible by another element.
40+
- [ ] Filesystem artifacts should be made accessible to the client.
41+
"""
42+
# Use the harness features to set up a reusable temporary directory
43+
def setUp(self):
44+
return
45+
46+
def tearDown(self):
47+
return
48+
49+
def test_directory_creation(self):
50+
"""Check that the session launched but not run in setUp() got created."""
51+
return
52+
53+
def test_directory_safety(self):
54+
"""Check that the Session logic refuses to overwrite existing data."""
55+
return
56+
57+
class PathManagementTestCase(unittest.TestCase):
58+
"""Test proper directory management for load_tpr and Session startup.
59+
60+
- [ ] Session should use working directory keyed by WorkSpec unique identifier.
61+
- [ ] Existing directory should not be corrupted.
62+
- [ ] Existing directory should be checked for state.
63+
- [ ] File inputs should be made accessible to the Session.
64+
- [ ] Filesystem artifacts from an element should be accessible by another element.
65+
- [ ] Filesystem artifacts should be made accessible to the client.
66+
"""
67+
# Use the harness features to set up a reusable temporary directory
68+
def setUp(self):
69+
return
70+
71+
def tearDown(self):
72+
return
73+
74+
def test_directory_creation(self):
75+
"""Check that the session launched but not run in setUp() got created."""
76+
return
77+
78+
def test_directory_safety(self):
79+
"""Check that the Session logic refuses to overwrite existing data."""
80+
return

src/gmx/workflow.py

Lines changed: 8 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -555,6 +555,13 @@ def get_source_elements(workspec):
555555
def from_tpr(input=None, **kwargs):
556556
"""Create a WorkSpec from a (list of) tpr file(s).
557557
558+
Absolute filenames are interpreted in reference to the local filesystems where the script is run, but the path is
559+
removed from the recorded work specification and the file is made available in the Session working directory.
560+
561+
Relative path names (or filenames without paths) are assumed to refer to files that already exist relative to the
562+
Session working directory. They are either outputs from other elements or must be put in place between session
563+
launch and session run. (See gmx.context)
564+
558565
Required Args:
559566
input: string or list of strings giving the filename(s) of simulation input
560567
@@ -573,7 +580,7 @@ def from_tpr(input=None, **kwargs):
573580
574581
Produces a WorkSpec with the following data.
575582
576-
version: "gmxapi_workspec_1_0"
583+
version: "gmxapi_workspec_0_1"
577584
elements:
578585
tpr_input:
579586
namespace: "gromacs"
@@ -604,7 +611,6 @@ def from_tpr(input=None, **kwargs):
604611
arg_path = os.path.abspath(arg)
605612
raise exceptions.UsageError(usage + " Got {}".format(arg_path))
606613

607-
# \todo These are runner parameters, not MD parameters, and should be in the call to gmx.run() instead of here.
608614
params = {}
609615
for arg_key in kwargs:
610616
if arg_key == 'grid' or arg_key == 'dd':

0 commit comments

Comments
 (0)