Skip to content

Commit 056af08

Browse files
committed
[MAINT] Improving hashing of nodes
1 parent b5f8537 commit 056af08

File tree

1 file changed

+32
-17
lines changed

1 file changed

+32
-17
lines changed

nipype/pipeline/engine/nodes.py

Lines changed: 32 additions & 17 deletions
Original file line numberDiff line numberDiff line change
@@ -185,10 +185,11 @@ def __init__(self,
185185
'num_threads') and self._n_procs is not None:
186186
self._interface.inputs.num_threads = self._n_procs
187187

188-
# Initialize needed_outputs
189-
self.needed_outputs = []
190-
if needed_outputs:
191-
self.needed_outputs = sorted(needed_outputs)
188+
# Initialize needed_outputs and hashes
189+
self._hashvalue = None
190+
self._hashed_inputs = None
191+
self._needed_outputs = []
192+
self.needed_outputs = sorted(needed_outputs)
192193

193194
@property
194195
def interface(self):
@@ -210,6 +211,20 @@ def outputs(self):
210211
"""Return the output fields of the underlying interface"""
211212
return self._interface._outputs()
212213

214+
@property
215+
def needed_outputs(self):
216+
return self._needed_outputs
217+
218+
@needed_outputs.setter
219+
def needed_outputs(self, new_outputs):
220+
"""Needed outputs changes the hash, refresh if changed"""
221+
new_outputs = sorted(new_outputs or [])
222+
if new_outputs != self._needed_outputs:
223+
# Reset hash
224+
self._hashvalue = None
225+
self._hashed_inputs = None
226+
self._needed_outputs = new_outputs
227+
213228
@property
214229
def mem_gb(self):
215230
"""Get estimated memory (GB)"""
@@ -387,8 +402,8 @@ def run(self, updatehash=False):
387402
logger.info('[Node] Setting-up "%s" in "%s".', self.fullname, outdir)
388403
hash_info = self.hash_exists(updatehash=updatehash)
389404
hash_exists, hashvalue, hashfile, hashed_inputs = hash_info
390-
force_run = self.overwrite or (self.overwrite is None
391-
and self._interface.always_run)
405+
force_run = self.overwrite or (self.overwrite is None and
406+
self._interface.always_run)
392407

393408
# If the node is cached, check on pklz files and finish
394409
if hash_exists and (updatehash or not force_run):
@@ -479,17 +494,17 @@ def run(self, updatehash=False):
479494
def _get_hashval(self):
480495
"""Return a hash of the input state"""
481496
self._get_inputs()
482-
hashed_inputs, hashvalue = self.inputs.get_hashval(
483-
hash_method=self.config['execution']['hash_method'])
484-
rm_extra = self.config['execution']['remove_unnecessary_outputs']
485-
if str2bool(rm_extra) and self.needed_outputs:
486-
hashobject = md5()
487-
hashobject.update(hashvalue.encode())
488-
sorted_outputs = sorted(self.needed_outputs)
489-
hashobject.update(str(sorted_outputs).encode())
490-
hashvalue = hashobject.hexdigest()
491-
hashed_inputs.append(('needed_outputs', sorted_outputs))
492-
return hashed_inputs, hashvalue
497+
if self._hashvalue is None and self._hashed_inputs is None:
498+
self._hashed_inputs, self._hashvalue = self.inputs.get_hashval(
499+
hash_method=self.config['execution']['hash_method'])
500+
rm_extra = self.config['execution']['remove_unnecessary_outputs']
501+
if str2bool(rm_extra) and self.needed_outputs:
502+
hashobject = md5()
503+
hashobject.update(self._hashvalue.encode())
504+
hashobject.update(str(self.needed_outputs).encode())
505+
self._hashvalue = hashobject.hexdigest()
506+
self._hashed_inputs.append(('needed_outputs', self.needed_outputs))
507+
return self._hashed_inputs, self._hashvalue
493508

494509
def _get_inputs(self):
495510
"""Retrieve inputs from pointers to results file

0 commit comments

Comments
 (0)