Skip to content

Commit 9896735

Browse files
committed
modified modified inputs so it returns the actual original inputs not a deepcopy as for some poorly behaved objects the hash of the deepcopy can be different
1 parent 6e91065 commit 9896735

File tree

3 files changed

+44
-11
lines changed

3 files changed

+44
-11
lines changed

pydra/engine/core.py

Lines changed: 29 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -464,13 +464,25 @@ def __call__(
464464
return res
465465

466466
def _modify_inputs(self):
467-
"""Update and preserve a Task's original inputs"""
467+
"""This method modifies the inputs of the task ahead of its execution:
468+
- links/copies upstream files and directories into the destination tasks
469+
working directory as required select state array values corresponding to
470+
state index (it will try to leave them where they are unless specified or
471+
they are on different file systems)
472+
- resolve template values (e.g. output_file_template)
473+
- deepcopy all inputs to guard against in-place changes during the task's
474+
execution (they will be replaced after the task's execution with the
475+
original inputs to ensure the tasks checksums are consistent)
476+
"""
468477
orig_inputs = {
469-
k: deepcopy(v) for k, v in attr.asdict(self.inputs, recurse=False).items()
478+
k: v
479+
for k, v in attr.asdict(self.inputs, recurse=False).items()
480+
if not k.startswith("_")
470481
}
471482
map_copyfiles = {}
472-
for fld in attr_fields(self.inputs):
473-
value = getattr(self.inputs, fld.name)
483+
input_fields = attr.fields(type(self.inputs))
484+
for name, value in orig_inputs.items():
485+
fld = getattr(input_fields, name)
474486
copy_mode, copy_collation = parse_copyfile(
475487
fld, default_collation=self.DEFAULT_COPY_COLLATION
476488
)
@@ -485,11 +497,21 @@ def _modify_inputs(self):
485497
supported_modes=self.SUPPORTED_COPY_MODES,
486498
)
487499
if value is not copied_value:
488-
map_copyfiles[fld.name] = copied_value
500+
map_copyfiles[name] = copied_value
489501
modified_inputs = template_update(
490502
self.inputs, self.output_dir, map_copyfiles=map_copyfiles
491503
)
492-
for name, value in modified_inputs.items():
504+
assert all(m in orig_inputs for m in modified_inputs), (
505+
"Modified inputs contain fields not present in original inputs. "
506+
"This is likely a bug."
507+
)
508+
for name, orig_value in orig_inputs.items():
509+
try:
510+
value = modified_inputs[name]
511+
except KeyError:
512+
# Ensure we pass a copy not the original just in case inner
513+
# attributes are modified during execution
514+
value = deepcopy(orig_value)
493515
setattr(self.inputs, name, value)
494516
return orig_inputs
495517

@@ -550,11 +572,9 @@ def _run(self, rerun=False, environment=None, **kwargs):
550572
save(output_dir, result=result, task=self)
551573
# removing the additional file with the checksum
552574
(self.cache_dir / f"{self.uid}_info.json").unlink()
553-
# # function etc. shouldn't change anyway, so removing
554575
# Restore original values to inputs
555576
for field_name, field_value in orig_inputs.items():
556-
if not field_name.startswith("_"):
557-
setattr(self.inputs, field_name, field_value)
577+
setattr(self.inputs, field_name, field_value)
558578
os.chdir(cwd)
559579
self.hooks.post_run(self, result)
560580
# Check for any changes to the input hashes that have occurred during the execution

pydra/engine/specs.py

Lines changed: 14 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -985,8 +985,21 @@ def get_value(
985985
if result is None:
986986
raise RuntimeError(
987987
f"Could not find results of '{node.name}' node in a sub-directory "
988-
f"named '{node.checksum}' in any of the cache locations:\n"
988+
f"named '{node.checksum}' in any of the cache locations.\n"
989989
+ "\n".join(str(p) for p in set(node.cache_locations))
990+
+ f"\n\nThis is likely due to hash changes in '{self.name}' node inputs. "
991+
f"Current values and hashes: {self.inputs}, "
992+
f"{self.inputs._hashes}\n\n"
993+
"Set loglevel to 'debug' in order to track hash changes "
994+
"throughout the execution of the workflow.\n\n "
995+
"These issues may have been caused by `bytes_repr()` methods "
996+
"that don't return stable hash values for specific object "
997+
"types across multiple processes (see bytes_repr() "
998+
'"singledispatch "function in pydra/utils/hash.py).'
999+
"You may need to implement a specific `bytes_repr()` "
1000+
'"singledispatch overload"s or `__bytes_repr__()` '
1001+
"dunder methods to handle one or more types in "
1002+
"your interface inputs."
9901003
)
9911004
_, split_depth = TypeParser.strip_splits(self.type)
9921005

pydra/utils/hash.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -64,7 +64,7 @@ def hash_function(obj, cache=None):
6464
return hash_object(obj, cache=cache).hex()
6565

6666

67-
def hash_object(obj: object, cache: Cache | None = None) -> Hash:
67+
def hash_object(obj: object, cache: ty.Optional[Cache] = None) -> Hash:
6868
"""Hash an object
6969
7070
Constructs a byte string that uniquely identifies the object,

0 commit comments

Comments
 (0)