Skip to content

Commit 6f8c965

Browse files
tsalomgxd
andauthored
ENH: Use BIDS URIs to track Sources in sidecars (#3255)
Closes #3252. This is just a first pass- it only modifies the RawSources for now. I can add the immediate Sources in another PR (this'll require a lot more effort). ## Changes proposed in this pull request - Add `DatasetLinks` field to the `dataset_description.json` - The input dataset is called `raw` - Any other datasets supplied through `--derivatives` are automatically named `deriv-<index>`. We might want to support named derivatives at some point. - Replace BIDS-relative paths in `RawSources` fields with BIDS-URIs. - Change `RawSources` to `Sources` in the sidecar files, since `RawSources` is deprecated. ## Documentation that should be reviewed I'll probably need to update the documentation, but haven't yet. --------- Co-authored-by: Mathias Goncalves <[email protected]>
1 parent cdf7040 commit 6f8c965

File tree

8 files changed

+303
-37
lines changed

8 files changed

+303
-37
lines changed

fmriprep/cli/run.py

Lines changed: 5 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -222,7 +222,11 @@ def main():
222222
config.execution.run_uuid,
223223
session_list=session_list,
224224
)
225-
write_derivative_description(config.execution.bids_dir, config.execution.fmriprep_dir)
225+
write_derivative_description(
226+
config.execution.bids_dir,
227+
config.execution.fmriprep_dir,
228+
dataset_links=config.execution.dataset_links,
229+
)
226230
write_bidsignore(config.execution.fmriprep_dir)
227231

228232
if failed_reports:

fmriprep/config.py

Lines changed: 12 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -226,6 +226,8 @@ def load(cls, settings, init=True, ignore=None):
226226
if k in cls._paths:
227227
if isinstance(v, list | tuple):
228228
setattr(cls, k, [Path(val).absolute() for val in v])
229+
elif isinstance(v, dict):
230+
setattr(cls, k, {key: Path(val).absolute() for key, val in v.items()})
229231
else:
230232
setattr(cls, k, Path(v).absolute())
231233
elif hasattr(cls, k):
@@ -251,6 +253,8 @@ def get(cls):
251253
if k in cls._paths:
252254
if isinstance(v, list | tuple):
253255
v = [str(val) for val in v]
256+
elif isinstance(v, dict):
257+
v = {key: str(val) for key, val in v.items()}
254258
else:
255259
v = str(v)
256260
if isinstance(v, SpatialReferences):
@@ -439,6 +443,8 @@ class execution(_Config):
439443
"""Path to a working directory where intermediate results will be available."""
440444
write_graph = False
441445
"""Write out the computational graph corresponding to the planned preprocessing."""
446+
dataset_links = {}
447+
"""A dictionary of dataset links to be used to track Sources in sidecars."""
442448

443449
_layout = None
444450

@@ -454,6 +460,7 @@ class execution(_Config):
454460
'output_dir',
455461
'templateflow_home',
456462
'work_dir',
463+
'dataset_links',
457464
)
458465

459466
@classmethod
@@ -518,6 +525,11 @@ def _process_value(value):
518525
for k, v in filters.items():
519526
cls.bids_filters[acq][k] = _process_value(v)
520527

528+
dataset_links = {'raw': cls.bids_dir}
529+
for i_deriv, deriv_path in enumerate(cls.derivatives):
530+
dataset_links[f'deriv-{i_deriv}'] = deriv_path
531+
cls.dataset_links = dataset_links
532+
521533
if 'all' in cls.debug:
522534
cls.debug = list(DEBUG_MODES)
523535

fmriprep/interfaces/bids.py

Lines changed: 62 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,62 @@
1+
"""BIDS-related interfaces."""
2+
3+
from pathlib import Path
4+
5+
from bids.utils import listify
6+
from nipype.interfaces.base import (
7+
DynamicTraitedSpec,
8+
SimpleInterface,
9+
TraitedSpec,
10+
isdefined,
11+
traits,
12+
)
13+
from nipype.interfaces.io import add_traits
14+
from nipype.interfaces.utility.base import _ravel
15+
16+
from ..utils.bids import _find_nearest_path
17+
18+
19+
class _BIDSURIInputSpec(DynamicTraitedSpec):
20+
dataset_links = traits.Dict(mandatory=True, desc='Dataset links')
21+
out_dir = traits.Str(mandatory=True, desc='Output directory')
22+
23+
24+
class _BIDSURIOutputSpec(TraitedSpec):
25+
out = traits.List(
26+
traits.Str,
27+
desc='BIDS URI(s) for file',
28+
)
29+
30+
31+
class BIDSURI(SimpleInterface):
32+
"""Convert input filenames to BIDS URIs, based on links in the dataset.
33+
34+
This interface can combine multiple lists of inputs.
35+
"""
36+
37+
input_spec = _BIDSURIInputSpec
38+
output_spec = _BIDSURIOutputSpec
39+
40+
def __init__(self, numinputs=0, **inputs):
41+
super().__init__(**inputs)
42+
self._numinputs = numinputs
43+
if numinputs >= 1:
44+
input_names = [f'in{i + 1}' for i in range(numinputs)]
45+
else:
46+
input_names = []
47+
add_traits(self.inputs, input_names)
48+
49+
def _run_interface(self, runtime):
50+
inputs = [getattr(self.inputs, f'in{i + 1}') for i in range(self._numinputs)]
51+
in_files = listify(inputs)
52+
in_files = _ravel(in_files)
53+
# Remove undefined inputs
54+
in_files = [f for f in in_files if isdefined(f)]
55+
# Convert the dataset links to BIDS URI prefixes
56+
updated_keys = {f'bids:{k}:': Path(v) for k, v in self.inputs.dataset_links.items()}
57+
updated_keys['bids::'] = Path(self.inputs.out_dir)
58+
# Convert the paths to BIDS URIs
59+
out = [_find_nearest_path(updated_keys, f) for f in in_files]
60+
self._results['out'] = out
61+
62+
return runtime
Lines changed: 72 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,72 @@
1+
"""Tests for fmriprep.interfaces.bids."""
2+
3+
4+
def test_BIDSURI():
5+
"""Test the BIDSURI interface."""
6+
from fmriprep.interfaces.bids import BIDSURI
7+
8+
dataset_links = {
9+
'raw': '/data',
10+
'deriv-0': '/data/derivatives/source-1',
11+
}
12+
out_dir = '/data/derivatives/fmriprep'
13+
14+
# A single element as a string
15+
interface = BIDSURI(
16+
numinputs=1,
17+
dataset_links=dataset_links,
18+
out_dir=out_dir,
19+
)
20+
interface.inputs.in1 = '/data/sub-01/func/sub-01_task-rest_bold.nii.gz'
21+
results = interface.run()
22+
assert results.outputs.out == ['bids:raw:sub-01/func/sub-01_task-rest_bold.nii.gz']
23+
24+
# A single element as a list
25+
interface = BIDSURI(
26+
numinputs=1,
27+
dataset_links=dataset_links,
28+
out_dir=out_dir,
29+
)
30+
interface.inputs.in1 = ['/data/sub-01/func/sub-01_task-rest_bold.nii.gz']
31+
results = interface.run()
32+
assert results.outputs.out == ['bids:raw:sub-01/func/sub-01_task-rest_bold.nii.gz']
33+
34+
# Two inputs: a string and a list
35+
interface = BIDSURI(
36+
numinputs=2,
37+
dataset_links=dataset_links,
38+
out_dir=out_dir,
39+
)
40+
interface.inputs.in1 = '/data/sub-01/func/sub-01_task-rest_bold.nii.gz'
41+
interface.inputs.in2 = [
42+
'/data/derivatives/source-1/sub-01/func/sub-01_task-rest_bold.nii.gz',
43+
'/out/sub-01/func/sub-01_task-rest_bold.nii.gz',
44+
]
45+
results = interface.run()
46+
assert results.outputs.out == [
47+
'bids:raw:sub-01/func/sub-01_task-rest_bold.nii.gz',
48+
'bids:deriv-0:sub-01/func/sub-01_task-rest_bold.nii.gz',
49+
'/out/sub-01/func/sub-01_task-rest_bold.nii.gz', # No change
50+
]
51+
52+
# Two inputs as lists
53+
interface = BIDSURI(
54+
numinputs=2,
55+
dataset_links=dataset_links,
56+
out_dir=out_dir,
57+
)
58+
interface.inputs.in1 = [
59+
'/data/sub-01/func/sub-01_task-rest_bold.nii.gz',
60+
'bids:raw:sub-01/func/sub-01_task-rest_boldref.nii.gz',
61+
]
62+
interface.inputs.in2 = [
63+
'/data/derivatives/source-1/sub-01/func/sub-01_task-rest_bold.nii.gz',
64+
'/out/sub-01/func/sub-01_task-rest_bold.nii.gz',
65+
]
66+
results = interface.run()
67+
assert results.outputs.out == [
68+
'bids:raw:sub-01/func/sub-01_task-rest_bold.nii.gz',
69+
'bids:raw:sub-01/func/sub-01_task-rest_boldref.nii.gz', # No change
70+
'bids:deriv-0:sub-01/func/sub-01_task-rest_bold.nii.gz',
71+
'/out/sub-01/func/sub-01_task-rest_bold.nii.gz', # No change
72+
]

fmriprep/utils/bids.py

Lines changed: 70 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -97,7 +97,7 @@ def write_bidsignore(deriv_dir):
9797
ignore_file.write_text('\n'.join(bids_ignore) + '\n')
9898

9999

100-
def write_derivative_description(bids_dir, deriv_dir):
100+
def write_derivative_description(bids_dir, deriv_dir, dataset_links=None):
101101
from .. import __version__
102102

103103
DOWNLOAD_URL = f'https://github.com/nipreps/fmriprep/archive/{__version__}.tar.gz'
@@ -145,6 +145,10 @@ def write_derivative_description(bids_dir, deriv_dir):
145145
if 'License' in orig_desc:
146146
desc['License'] = orig_desc['License']
147147

148+
# Add DatasetLinks
149+
if dataset_links:
150+
desc['DatasetLinks'] = {k: str(v) for k, v in dataset_links.items()}
151+
148152
Path.write_text(deriv_dir / 'dataset_description.json', json.dumps(desc, indent=4))
149153

150154

@@ -343,3 +347,68 @@ def dismiss_echo(entities=None):
343347
entities.append('echo')
344348

345349
return entities
350+
351+
352+
def _find_nearest_path(path_dict, input_path):
353+
"""Find the nearest relative path from an input path to a dictionary of paths.
354+
355+
If ``input_path`` is not relative to any of the paths in ``path_dict``,
356+
the absolute path string is returned.
357+
358+
If ``input_path`` is already a BIDS-URI, then it will be returned unmodified.
359+
360+
Parameters
361+
----------
362+
path_dict : dict of (str, Path)
363+
A dictionary of paths.
364+
input_path : Path
365+
The input path to match.
366+
367+
Returns
368+
-------
369+
matching_path : str
370+
The nearest relative path from the input path to a path in the dictionary.
371+
This is either the concatenation of the associated key from ``path_dict``
372+
and the relative path from the associated value from ``path_dict`` to ``input_path``,
373+
or the absolute path to ``input_path`` if no matching path is found from ``path_dict``.
374+
375+
Examples
376+
--------
377+
>>> from pathlib import Path
378+
>>> path_dict = {
379+
... 'bids::': Path('/data/derivatives/fmriprep'),
380+
... 'bids:raw:': Path('/data'),
381+
... 'bids:deriv-0:': Path('/data/derivatives/source-1'),
382+
... }
383+
>>> input_path = Path('/data/derivatives/source-1/sub-01/func/sub-01_task-rest_bold.nii.gz')
384+
>>> _find_nearest_path(path_dict, input_path) # match to 'bids:deriv-0:'
385+
'bids:deriv-0:sub-01/func/sub-01_task-rest_bold.nii.gz'
386+
>>> input_path = Path('/out/sub-01/func/sub-01_task-rest_bold.nii.gz')
387+
>>> _find_nearest_path(path_dict, input_path) # no match- absolute path
388+
'/out/sub-01/func/sub-01_task-rest_bold.nii.gz'
389+
>>> input_path = Path('/data/sub-01/func/sub-01_task-rest_bold.nii.gz')
390+
>>> _find_nearest_path(path_dict, input_path) # match to 'bids:raw:'
391+
'bids:raw:sub-01/func/sub-01_task-rest_bold.nii.gz'
392+
>>> input_path = 'bids::sub-01/func/sub-01_task-rest_bold.nii.gz'
393+
>>> _find_nearest_path(path_dict, input_path) # already a BIDS-URI
394+
'bids::sub-01/func/sub-01_task-rest_bold.nii.gz'
395+
"""
396+
# Don't modify BIDS-URIs
397+
if isinstance(input_path, str) and input_path.startswith('bids:'):
398+
return input_path
399+
400+
input_path = Path(input_path)
401+
matching_path = None
402+
for key, path in path_dict.items():
403+
if input_path.is_relative_to(path):
404+
relative_path = input_path.relative_to(path)
405+
if (matching_path is None) or (len(relative_path.parts) < len(matching_path.parts)):
406+
matching_key = key
407+
matching_path = relative_path
408+
409+
if matching_path is None:
410+
matching_path = str(input_path.absolute())
411+
else:
412+
matching_path = f'{matching_key}{matching_path}'
413+
414+
return matching_path

fmriprep/workflows/bold/base.py

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -328,6 +328,8 @@ def init_bold_wf(
328328
workflow.connect([
329329
(bold_fit_wf, ds_bold_native_wf, [
330330
('outputnode.bold_mask', 'inputnode.bold_mask'),
331+
('outputnode.motion_xfm', 'inputnode.motion_xfm'),
332+
('outputnode.boldref2fmap_xfm', 'inputnode.boldref2fmap_xfm'),
331333
]),
332334
(bold_native_wf, ds_bold_native_wf, [
333335
('outputnode.bold_native', 'inputnode.bold'),

fmriprep/workflows/bold/fit.py

Lines changed: 16 additions & 14 deletions
Original file line numberDiff line numberDiff line change
@@ -378,6 +378,10 @@ def init_bold_fit_wf(
378378
# fmt:on
379379

380380
# Stage 1: Generate motion correction boldref
381+
hmc_boldref_source_buffer = pe.Node(
382+
niu.IdentityInterface(fields=['in_file']),
383+
name='hmc_boldref_source_buffer',
384+
)
381385
if not have_hmcref:
382386
config.loggers.workflow.info('Stage 1: Adding HMC boldref workflow')
383387
hmc_boldref_wf = init_raw_boldref_wf(
@@ -395,7 +399,6 @@ def init_bold_fit_wf(
395399
)
396400
ds_hmc_boldref_wf.inputs.inputnode.source_files = [bold_file]
397401

398-
# fmt:off
399402
workflow.connect([
400403
(hmc_boldref_wf, hmcref_buffer, [
401404
('outputnode.bold_file', 'bold_file'),
@@ -407,8 +410,10 @@ def init_bold_fit_wf(
407410
(hmc_boldref_wf, func_fit_reports_wf, [
408411
('outputnode.validation_report', 'inputnode.validation_report'),
409412
]),
410-
])
411-
# fmt:on
413+
(ds_hmc_boldref_wf, hmc_boldref_source_buffer, [
414+
('outputnode.boldref', 'in_file'),
415+
]),
416+
]) # fmt:skip
412417
else:
413418
config.loggers.workflow.info('Found HMC boldref - skipping Stage 1')
414419

@@ -417,12 +422,11 @@ def init_bold_fit_wf(
417422

418423
hmcref_buffer.inputs.boldref = precomputed['hmc_boldref']
419424

420-
# fmt:off
421425
workflow.connect([
422426
(validate_bold, hmcref_buffer, [('out_file', 'bold_file')]),
423427
(validate_bold, func_fit_reports_wf, [('out_report', 'inputnode.validation_report')]),
424-
])
425-
# fmt:on
428+
(hmcref_buffer, hmc_boldref_source_buffer, [('boldref', 'in_file')]),
429+
]) # fmt:skip
426430

427431
# Stage 2: Estimate head motion
428432
if not hmc_xforms:
@@ -437,20 +441,18 @@ def init_bold_fit_wf(
437441
)
438442
ds_hmc_wf.inputs.inputnode.source_files = [bold_file]
439443

440-
# fmt:off
441444
workflow.connect([
442445
(hmcref_buffer, bold_hmc_wf, [
443446
('boldref', 'inputnode.raw_ref_image'),
444447
('bold_file', 'inputnode.bold_file'),
445448
]),
446449
(bold_hmc_wf, ds_hmc_wf, [('outputnode.xforms', 'inputnode.xforms')]),
447450
(bold_hmc_wf, hmc_buffer, [
448-
('outputnode.xforms', 'hmc_xforms'),
449451
('outputnode.movpar_file', 'movpar_file'),
450452
('outputnode.rmsd_file', 'rmsd_file'),
451453
]),
452-
])
453-
# fmt:on
454+
(ds_hmc_wf, hmc_buffer, [('outputnode.xforms', 'hmc_xforms')]),
455+
]) # fmt:skip
454456
else:
455457
config.loggers.workflow.info('Found motion correction transforms - skipping Stage 2')
456458
hmc_buffer.inputs.hmc_xforms = hmc_xforms
@@ -471,15 +473,15 @@ def init_bold_fit_wf(
471473
name='ds_coreg_boldref_wf',
472474
)
473475

474-
# fmt:off
475476
workflow.connect([
476477
(hmcref_buffer, fmapref_buffer, [('boldref', 'boldref_files')]),
477478
(fmapref_buffer, enhance_boldref_wf, [('out', 'inputnode.in_file')]),
478-
(fmapref_buffer, ds_coreg_boldref_wf, [('out', 'inputnode.source_files')]),
479+
(hmc_boldref_source_buffer, ds_coreg_boldref_wf, [
480+
('in_file', 'inputnode.source_files'),
481+
]),
479482
(ds_coreg_boldref_wf, regref_buffer, [('outputnode.boldref', 'boldref')]),
480483
(fmapref_buffer, func_fit_reports_wf, [('out', 'inputnode.sdc_boldref')]),
481-
])
482-
# fmt:on
484+
]) # fmt:skip
483485

484486
if fieldmap_id:
485487
fmap_select = pe.Node(

0 commit comments

Comments
 (0)