Merge pull request #556 from rcali21/PROV-test

djarecka · web-flow · commit 64916f4e59b4 · 2022-09-09T11:33:18.000-04:00
WIP: adding task input info
diff --git a/pydra/engine/audit.py b/pydra/engine/audit.py
@@ -49,7 +49,15 @@ def start_audit(self, odir):
         self.odir = odir
         if self.audit_check(AuditFlag.PROV):
             self.aid = f"uid:{gen_uuid()}"
-            start_message = {"@id": self.aid, "@type": "task", "startedAtTime": now()}
+
+            user_id = f"uid:{gen_uuid()}"
+            start_message = {
+                "@id": self.aid,
+                "@type": "task",
+                "startedAtTime": now(),
+                "executedBy": user_id,
+            }
+
         os.chdir(self.odir)
         if self.audit_check(AuditFlag.PROV):
             self.audit_message(start_message, AuditFlag.PROV)
@@ -160,3 +168,24 @@ def audit_check(self, flag):
             Boolean AND for self.oudit_flags and flag
         """
         return self.audit_flags & flag
+
+    def audit_task(self, task):
+        label = task.name
+        if hasattr(task.inputs, "executable"):
+            command = task.cmdline
+        # assume function task
+        else:
+            # work on changing this to function name
+            command = None
+
+        start_message = {
+            "@id": self.aid,
+            "@type": "task",
+            "label": label,
+            "command": command,
+            "startedAtTime": now(),
+        }
+        self.audit_message(start_message, AuditFlag.PROV)
+
+        # add more fields according to BEP208 doc
+        # with every field, check in tests
diff --git a/pydra/engine/core.py b/pydra/engine/core.py
@@ -503,6 +503,8 @@ def _run(self, rerun=False, **kwargs):
             result = Result(output=None, runtime=None, errored=False)
             self.hooks.pre_run_task(self)
             self.audit.start_audit(odir=output_dir)
+            if self.audit.audit_check(AuditFlag.PROV):
+                self.audit.audit_task(task=self)
             try:
                 self.audit.monitor()
                 self._run_task()
diff --git a/pydra/engine/tests/test_task.py b/pydra/engine/tests/test_task.py
@@ -5,7 +5,8 @@
 import cloudpickle as cp
 from pathlib import Path
 import re
-
+import json
+import glob as glob
 from ... import mark
 from ..core import Workflow
 from ..task import AuditFlag, ShellCommandTask, DockerTask, SingularityTask
@@ -986,6 +987,60 @@ def testfunc(a: int, b: float = 0.1) -> ty.NamedTuple("Output", [("out", float)]
     assert (tmpdir / funky.checksum / "messages.jsonld").exists()
 
 
+def test_audit_task(tmpdir):
+    @mark.task
+    def testfunc(a: int, b: float = 0.1) -> ty.NamedTuple("Output", [("out", float)]):
+        return a + b
+
+    from glob import glob
+
+    funky = testfunc(a=2, audit_flags=AuditFlag.PROV, messengers=FileMessenger())
+    funky.cache_dir = tmpdir
+    funky()
+    message_path = tmpdir / funky.checksum / "messages"
+    # go through each jsonld file in message_path and check if the label field exists
+    json_content = []
+    for file in glob(str(message_path) + "/*.jsonld"):
+        with open(file, "r") as f:
+            data = json.load(f)
+            if "label" in data:
+                json_content.append(True)
+                assert "testfunc" == data["label"]
+    assert any(json_content)
+
+
+def test_audit_shellcommandtask(tmpdir):
+    args = "-l"
+    shelly = ShellCommandTask(
+        name="shelly",
+        executable="ls",
+        args=args,
+        audit_flags=AuditFlag.PROV,
+        messengers=FileMessenger(),
+    )
+
+    from glob import glob
+
+    shelly.cache_dir = tmpdir
+    shelly()
+    message_path = tmpdir / shelly.checksum / "messages"
+    # go through each jsonld file in message_path and check if the label field exists
+    label_content = []
+    command_content = []
+
+    for file in glob(str(message_path) + "/*.jsonld"):
+        with open(file, "r") as f:
+            data = json.load(f)
+            if "label" in data:
+                label_content.append(True)
+            if "command" in data:
+                command_content.append(True)
+                assert "ls -l" == data["command"]
+
+    print(command_content)
+    assert any(label_content)
+
+
 def test_audit_prov_messdir_1(tmpdir, use_validator):
     """customized messenger dir"""
 
@@ -1082,7 +1137,7 @@ def testfunc(a: int, b: float = 0.1) -> ty.NamedTuple("Output", [("out", float)]
     from glob import glob
 
     assert len(glob(str(tmpdir / funky.checksum / "proc*.log"))) == 1
-    assert len(glob(str(message_path / "*.jsonld"))) == 6
+    assert len(glob(str(message_path / "*.jsonld"))) == 7
 
     # commented out to speed up testing
     collect_messages(tmpdir / funky.checksum, message_path, ld_op="compact")