Skip to content

Commit c8ecd29

Browse files
authored
Merge pull request #195 from djarecka/fix/filelist_hash_recurs
hash for containers (closes #178)
2 parents f892ce0 + 068cf92 commit c8ecd29

File tree

8 files changed

+330
-16
lines changed

8 files changed

+330
-16
lines changed

pydra/engine/helpers.py

Lines changed: 22 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -10,6 +10,7 @@
1010
import subprocess as sp
1111

1212
from .specs import Runtime, File, attr_fields
13+
from .helpers_file import is_existing_file, hash_file
1314

1415

1516
def ensure_list(obj, tuple2list=False):
@@ -396,6 +397,27 @@ def hash_function(obj):
396397
return sha256(str(obj).encode()).hexdigest()
397398

398399

400+
def hash_value(value, tp=None, metadata=None):
401+
"""calculating hash or returning values recursively"""
402+
if isinstance(value, (tuple, list)):
403+
return [hash_value(el, tp, metadata) for el in value]
404+
elif isinstance(value, dict):
405+
dict_hash = {k: hash_value(v, tp, metadata) for (k, v) in value.items()}
406+
# returning a sorted object
407+
return sorted(dict_hash.items(), key=lambda x: x[0])
408+
else: # not a container
409+
if (
410+
(tp is File or "pydra.engine.specs.File" in str(tp))
411+
and is_existing_file(value)
412+
and "container_path" not in metadata
413+
):
414+
return hash_file(value)
415+
elif isinstance(value, tuple):
416+
return list(value)
417+
else:
418+
return value
419+
420+
399421
def output_names_from_inputfields(inputs):
400422
"""
401423
Collect outputs from input fields with output_file_template.

pydra/engine/helpers_file.py

Lines changed: 8 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -110,11 +110,6 @@ def fname_presuffix(fname, prefix="", suffix="", newpath=None, use_ext=True):
110110
def hash_file(afile, chunk_len=8192, crypto=sha256, raise_notfound=True):
111111
"""Compute hash of a file using 'crypto' module."""
112112
from .specs import LazyField
113-
from .helpers import hash_function
114-
115-
# adding option for tasks with splitter over list of files
116-
if isinstance(afile, list):
117-
return hash_function([hash_file(el) for el in afile])
118113

119114
if afile is None or isinstance(afile, LazyField) or isinstance(afile, list):
120115
return None
@@ -517,3 +512,11 @@ def is_local_file(f):
517512
from .specs import File
518513

519514
return f.type is File and "container_path" not in f.metadata
515+
516+
517+
def is_existing_file(f):
518+
""" checking if an object is an existing file"""
519+
try:
520+
return Path(f).exists()
521+
except TypeError:
522+
return False

pydra/engine/specs.py

Lines changed: 4 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -39,21 +39,17 @@ def collect_additional_outputs(self, input_spec, inputs, output_dir):
3939
@property
4040
def hash(self):
4141
"""Compute a basic hash for any given set of fields."""
42-
from .helpers import hash_function
43-
from .helpers_file import hash_file
42+
from .helpers import hash_value, hash_function
4443

4544
inp_dict = {}
4645
for field in attr_fields(self):
4746
if field.name in ["_graph_checksums", "bindings"] or field.metadata.get(
4847
"output_file_template"
4948
):
5049
continue
51-
if field.type == File and "container_path" not in field.metadata:
52-
inp_dict[field.name] = hash_file(getattr(self, field.name))
53-
elif isinstance(getattr(self, field.name), tuple):
54-
inp_dict[field.name] = list(getattr(self, field.name))
55-
else:
56-
inp_dict[field.name] = getattr(self, field.name)
50+
inp_dict[field.name] = hash_value(
51+
value=getattr(self, field.name), tp=field.type, metadata=field.metadata
52+
)
5753
inp_hash = hash_function(inp_dict)
5854
if hasattr(self, "_graph_checksums"):
5955
return hash_function((inp_hash, self._graph_checksums))

pydra/engine/task.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -391,7 +391,7 @@ def _run_task(self):
391391
args = self.command_args
392392
if args:
393393
# removing emty strings
394-
args = [el for el in args if el not in ["", " "]]
394+
args = [str(el) for el in args if el not in ["", " "]]
395395
keys = ["return_code", "stdout", "stderr"]
396396
values = execute(args, strip=self.strip)
397397
self.output_ = dict(zip(keys, values))

pydra/engine/tests/test_node_task.py

Lines changed: 93 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -3,7 +3,15 @@
33
import numpy as np
44
import pytest
55

6-
from .utils import fun_addtwo, fun_addvar, moment, fun_div
6+
from .utils import (
7+
fun_addtwo,
8+
fun_addvar,
9+
moment,
10+
fun_div,
11+
fun_dict,
12+
fun_file,
13+
fun_file_list,
14+
)
715

816
from ..core import TaskBase
917
from ..submitter import Submitter
@@ -283,6 +291,32 @@ def test_task_init_6():
283291
assert nn.state.states_val == []
284292

285293

294+
@pytest.mark.parametrize("plugin", Plugins)
295+
def test_task_init_7(plugin, tmpdir):
296+
""" task with a dictionary of files as an input, checking checksum"""
297+
file1 = tmpdir.join("file1.txt")
298+
with open(file1, "w") as f:
299+
f.write("hello")
300+
301+
file2 = tmpdir.join("file2.txt")
302+
with open(file2, "w") as f:
303+
f.write("from pydra\n")
304+
305+
nn1 = fun_file_list(name="NA", filename_list=[file1, file2])
306+
output_dir1 = nn1.output_dir
307+
308+
# changing the content of the file
309+
file2 = tmpdir.join("file2.txt")
310+
with open(file2, "w") as f:
311+
f.write("from pydra")
312+
313+
nn2 = fun_file_list(name="NA", filename_list=[file1, file2])
314+
output_dir2 = nn2.output_dir
315+
316+
# the checksum should be different - content of file2 is different
317+
assert output_dir1.name != output_dir2.name
318+
319+
286320
def test_task_error():
287321
func = fun_div(name="div", a=1, b=0)
288322
with pytest.raises(ZeroDivisionError):
@@ -382,6 +416,64 @@ def test_task_nostate_2(plugin):
382416
assert nn.output_dir.exists()
383417

384418

419+
@pytest.mark.parametrize("plugin", Plugins)
420+
def test_task_nostate_3(plugin):
421+
""" task with a dictionary as an input"""
422+
nn = fun_dict(name="NA", d={"a": "ala", "b": "bala"})
423+
assert nn.inputs.d == {"a": "ala", "b": "bala"}
424+
425+
with Submitter(plugin=plugin) as sub:
426+
sub(nn)
427+
428+
# checking the results
429+
results = nn.result()
430+
assert results.output.out == "a:ala_b:bala"
431+
# checking the output_dir
432+
assert nn.output_dir.exists()
433+
434+
435+
@pytest.mark.parametrize("plugin", Plugins)
436+
def test_task_nostate_4(plugin, tmpdir):
437+
""" task with a dictionary as an input"""
438+
file1 = tmpdir.join("file.txt")
439+
with open(file1, "w") as f:
440+
f.write("hello from pydra\n")
441+
442+
nn = fun_file(name="NA", filename=file1)
443+
444+
with Submitter(plugin=plugin) as sub:
445+
sub(nn)
446+
447+
# checking the results
448+
results = nn.result()
449+
assert results.output.out == "hello from pydra\n"
450+
# checking the output_dir
451+
assert nn.output_dir.exists()
452+
453+
454+
@pytest.mark.parametrize("plugin", Plugins)
455+
def test_task_nostate_5(plugin, tmpdir):
456+
""" task with a dictionary of files as an input"""
457+
file1 = tmpdir.join("file1.txt")
458+
with open(file1, "w") as f:
459+
f.write("hello")
460+
461+
file2 = tmpdir.join("file2.txt")
462+
with open(file2, "w") as f:
463+
f.write("from pydra\n")
464+
465+
nn = fun_file_list(name="NA", filename_list=[file1, file2])
466+
467+
with Submitter(plugin=plugin) as sub:
468+
sub(nn)
469+
470+
# checking the results
471+
results = nn.result()
472+
assert results.output.out == "hello from pydra\n"
473+
# checking the output_dir
474+
assert nn.output_dir.exists()
475+
476+
385477
# Testing caching for tasks without states
386478

387479

pydra/engine/tests/test_shelltask.py

Lines changed: 42 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -910,6 +910,48 @@ def test_shell_cmd_inputspec_7a(plugin, results_function):
910910
assert res.output.out1_changed.exists()
911911

912912

913+
@pytest.mark.parametrize("results_function", [result_no_submitter, result_submitter])
914+
@pytest.mark.parametrize("plugin", Plugins)
915+
def test_shell_cmd_inputspec_8(plugin, results_function, tmpdir):
916+
""" using input_spec, providing list of files as an input """
917+
918+
file_1 = tmpdir.join("file_1.txt")
919+
file_2 = tmpdir.join("file_2.txt")
920+
with open(file_1, "w") as f:
921+
f.write("hello ")
922+
with open(file_2, "w") as f:
923+
f.write("from boston")
924+
925+
cmd_exec = "cat"
926+
files_list = [file_1, file_2]
927+
928+
my_input_spec = SpecInfo(
929+
name="Input",
930+
fields=[
931+
(
932+
"files",
933+
attr.ib(
934+
type=ty.List[File],
935+
metadata={
936+
"position": 1,
937+
"help_string": "list of files",
938+
"mandatory": True,
939+
},
940+
),
941+
)
942+
],
943+
bases=(ShellSpec,),
944+
)
945+
946+
shelly = ShellCommandTask(
947+
name="shelly", executable=cmd_exec, files=files_list, input_spec=my_input_spec
948+
)
949+
950+
assert shelly.inputs.executable == cmd_exec
951+
res = results_function(shelly, plugin)
952+
assert res.output.stdout == "hello from boston"
953+
954+
913955
@pytest.mark.parametrize("results_function", [result_no_submitter, result_submitter])
914956
@pytest.mark.parametrize("plugin", Plugins)
915957
def test_shell_cmd_inputspec_copyfile_1(plugin, results_function, tmpdir):

0 commit comments

Comments
 (0)