Skip to content

Commit aabe729

Browse files
committed
Merge pull request #488 from moloney/enh-datafinder
Enh datafinder
2 parents 4461db2 + 306f786 commit aabe729

File tree

1 file changed

+151
-0
lines changed

1 file changed

+151
-0
lines changed

nipype/interfaces/io.py

Lines changed: 151 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -550,6 +550,157 @@ def _list_outputs(self):
550550
outputs[key] = outputs[key][0]
551551
return outputs
552552

553+
class DataFinderInputSpec(DynamicTraitedSpec, BaseInterfaceInputSpec):
554+
root_paths = traits.Either(traits.List(),
555+
traits.Str(),
556+
mandatory=True,)
557+
match_regex = traits.Str('(.+)',
558+
usedefault=True,
559+
desc=("Regular expression for matching "
560+
"paths."))
561+
ignore_regexes = traits.List(desc=("List of regular expressions, "
562+
"if any match the path it will be "
563+
"ignored.")
564+
)
565+
max_depth = traits.Int(desc="The maximum depth to search beneath "
566+
"the root_paths")
567+
min_depth = traits.Int(desc="The minimum depth to search beneath "
568+
"the root paths")
569+
unpack_single = traits.Bool(False,
570+
usedefault=True,
571+
desc="Unpack single results from list")
572+
573+
574+
class DataFinder(IOBase):
575+
"""Search for paths that match a given regular expression. Allows a less
576+
proscriptive approach to gathering input files compared to DataGrabber.
577+
Will recursively search any subdirectories by default. This can be limited
578+
with the min/max depth options.
579+
580+
Matched paths are available in the output 'out_paths'. Any named groups of
581+
captured text from the regular expression are also available as ouputs of
582+
the same name.
583+
584+
Examples
585+
--------
586+
587+
>>> from nipype.interfaces.io import DataFinder
588+
589+
Look for Nifti files in directories with "ep2d_fid" or "qT1" in the name,
590+
starting in the current directory.
591+
592+
>>> df = DataFinder()
593+
>>> df.inputs.root_paths = '.'
594+
>>> df.inputs.match_regex = '.+/(?P<series_dir>.+(qT1|ep2d_fid_T1).+)/(?P<basename>.+)\.nii.gz'
595+
>>> result = df.run()
596+
>>> print result.outputs.out_paths
597+
['./027-ep2d_fid_T1_Gd4/acquisition.nii.gz',
598+
'./018-ep2d_fid_T1_Gd2/acquisition.nii.gz',
599+
'./016-ep2d_fid_T1_Gd1/acquisition.nii.gz',
600+
'./013-ep2d_fid_T1_pre/acquisition.nii.gz']
601+
602+
>>> print result.outputs.series_dir
603+
['027-ep2d_fid_T1_Gd4',
604+
'018-ep2d_fid_T1_Gd2',
605+
'016-ep2d_fid_T1_Gd1',
606+
'013-ep2d_fid_T1_pre']
607+
608+
>>> print result.outputs.basename
609+
['acquisition',
610+
'acquisition',
611+
'acquisition',
612+
'acquisition']
613+
614+
"""
615+
616+
input_spec = DataFinderInputSpec
617+
output_spec = DynamicTraitedSpec
618+
_always_run = True
619+
620+
def _match_path(self, target_path):
621+
#Check if we should ignore the path
622+
for ignore_re in self.ignore_regexes:
623+
if ignore_re.search(target_path):
624+
return
625+
626+
#Check if we can match the path
627+
match = self.match_regex.search(target_path)
628+
if not match is None:
629+
match_dict = match.groupdict()
630+
631+
if self.result is None:
632+
self.result = {'out_paths' : []}
633+
for key in match_dict.keys():
634+
self.result[key] = []
635+
636+
self.result['out_paths'].append(target_path)
637+
for key, val in match_dict.iteritems():
638+
self.result[key].append(val)
639+
640+
def _run_interface(self, runtime):
641+
#Prepare some of the inputs
642+
if isinstance(self.inputs.root_paths, str):
643+
self.inputs.root_paths = [self.inputs.root_paths]
644+
self.match_regex = re.compile(self.inputs.match_regex)
645+
if self.inputs.max_depth is Undefined:
646+
max_depth = None
647+
else:
648+
max_depth = self.inputs.max_depth
649+
if self.inputs.min_depth is Undefined:
650+
min_depth = 0
651+
else:
652+
min_depth = self.inputs.min_depth
653+
if self.inputs.ignore_regexes is Undefined:
654+
self.ignore_regexes = []
655+
else:
656+
self.ignore_regexes = \
657+
[re.compile(regex)
658+
for regex in self.inputs.ignore_regexes]
659+
660+
self.result = None
661+
for root_path in self.inputs.root_paths:
662+
#Handle tilda/env variables and remove extra seperators
663+
root_path = os.path.normpath(os.path.expandvars(os.path.expanduser(root_path)))
664+
665+
#Check if the root_path is a file
666+
if os.path.isfile(root_path):
667+
if min_depth == 0:
668+
self._match_path(root_path)
669+
continue
670+
671+
#Walk through directory structure checking paths
672+
for curr_dir, sub_dirs, files in os.walk(root_path):
673+
#Determine the current depth from the root_path
674+
curr_depth = (curr_dir.count(os.sep) -
675+
root_path.count(os.sep))
676+
677+
#If the max path depth has been reached, clear sub_dirs
678+
#and files
679+
if (not max_depth is None and
680+
curr_depth >= max_depth):
681+
sub_dirs[:] = []
682+
files = []
683+
684+
#Test the path for the curr_dir and all files
685+
if curr_depth >= min_depth:
686+
self._match_path(curr_dir)
687+
if curr_depth >= (min_depth - 1):
688+
for infile in files:
689+
full_path = os.path.join(curr_dir, infile)
690+
self._match_path(full_path)
691+
692+
if (self.inputs.unpack_single and
693+
len(self.result['out_paths']) == 1
694+
):
695+
for key, vals in self.result.iteritems():
696+
self.result[key] = vals[0]
697+
698+
return runtime
699+
700+
def _list_outputs(self):
701+
outputs = self._outputs().get()
702+
outputs.update(self.result)
703+
return outputs
553704

554705
class FSSourceInputSpec(BaseInterfaceInputSpec):
555706
subjects_dir = Directory(mandatory=True,

0 commit comments

Comments
 (0)