Added dict for atlocation, digest tracking, added tests

Ryan Cali · Ryan Cali · commit dfecb8a66666 · 2022-11-22T12:52:33.000-08:00
diff --git a/pydra/engine/audit.py b/pydra/engine/audit.py
@@ -176,19 +176,25 @@ def audit_task(self, task):
         label = task.name
         entity_label = type(label)
 
-        command = task.cmdline if hasattr(task.inputs, "executable") else None
+        if hasattr(task.inputs, "executable"):
+            command = task.cmdline
+        # assume function task
+        else:
+            command = None
+
+        path_hash_dict = {}
 
         attr_list = attr_fields(task.inputs)
         for attrs in attr_list:
             if attrs.type in [File, Directory]:
                 input_name = attrs.name
                 input_path = os.path.abspath(getattr(task.inputs, input_name))
                 file_hash = hash_file(input_path)
+                path_hash_dict[input_path] = file_hash
 
-            else:
-                input_name = attrs.name
-                input_path = None
-                file_hash = None
+        # get the hash for the output
+        input_paths = list(path_hash_dict.keys())
+        input_paths_hash = list(path_hash_dict.values())
 
         if command is not None:
             cmd_name = command.split()[0]
@@ -218,13 +224,16 @@ def audit_task(self, task):
         }
         entity_id = f"uid:{gen_uuid()}"
         entity_message = {
-            "@id": entity_id,
+            "@id": entity_id, 
             "Label": print(entity_label),
-            "AtLocation": input_path,
-            "GeneratedBy": "test",
+            "AtLocation": input_paths, #
+            "GeneratedBy": "test",  
             "@type": "input",
-            "digest": file_hash,
+            "digest": input_paths_hash  
         }
 
+      
+
         self.audit_message(start_message, AuditFlag.PROV)
         self.audit_message(entity_message, AuditFlag.PROV)
+
diff --git a/pydra/engine/tests/test_task.py b/pydra/engine/tests/test_task.py
@@ -1008,8 +1008,6 @@ def testfunc(a: int, b: float = 0.1) -> ty.NamedTuple("Output", [("out", float)]
     funky()
     message_path = tmpdir / funky.checksum / "messages"
     print(message_path)
-    # go through each jsonld file in message_path and check if the label field exists
-    json_content = []
 
     for file in glob(str(message_path) + "/*.jsonld"):
         with open(file, "r") as f:
@@ -1023,7 +1021,7 @@ def testfunc(a: int, b: float = 0.1) -> ty.NamedTuple("Output", [("out", float)]
                     assert None == data["Label"]
                     # placeholder for atlocation until
                     # new test is added
-                    assert None == data["AtLocation"]
+                    assert [] == data["AtLocation"]
 
                 # assert data["Type"] == "input"
 
@@ -1072,13 +1070,19 @@ def test_audit_shellcommandtask(tmpdir):
 
 
 def test_audit_shellcommandtask_file(tmpdir):
+    import shutil
     # create test.txt file with "This is a test" in it in the tmpdir
     with open(tmpdir / "test.txt", "w") as f:
         f.write("This is a test.")
+    # make a copy of the test.txt file in the tmpdir and name it test2.txt
+    shutil.copy(tmpdir / "test.txt", tmpdir / "test2.txt")
+
 
     cmd = "cat"
     file_in = tmpdir / "test.txt"
+    file_in_2 = tmpdir / "test2.txt"
     test_file_hash = hash_file(file_in)
+    test_file_hash_2 = hash_file(file_in_2)
     my_input_spec = SpecInfo(
         name="Input",
         fields=[
@@ -1093,13 +1097,26 @@ def test_audit_shellcommandtask_file(tmpdir):
                         "mandatory": True,
                     },
                 ),
+            ),
+                        (
+                "in_file_2",
+                attr.ib(
+                    type=File,
+                    metadata={
+                        "position": 2,
+                        "argstr": "",
+                        "help_string": "text",
+                        "mandatory": True,
+                    },
+                ),
             )
         ],
         bases=(ShellSpec,),
     )
     shelly = ShellCommandTask(
         name="shelly",
         in_file=file_in,
+        in_file_2=file_in_2,
         input_spec=my_input_spec,
         executable=cmd,
         audit_flags=AuditFlag.PROV,
@@ -1113,9 +1130,9 @@ def test_audit_shellcommandtask_file(tmpdir):
             data = json.load(f)
             print(file_in)
             if "AtLocation" in data:
-                assert data["AtLocation"] == str(file_in)
+                assert data["AtLocation"] == [file_in, file_in_2]
             if "digest" in data:
-                assert test_file_hash == data["digest"]
+                assert data["digest"] == [test_file_hash, test_file_hash_2]
 
 
 def test_audit_shellcommandtask_version(tmpdir):