-
Notifications
You must be signed in to change notification settings - Fork 129
[WIP] introduce BIDSLayoutV2 #863
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Open
erdalkaraca
wants to merge
8
commits into
bids-standard:main
Choose a base branch
from
ANCPLabOldenburg:master
base: main
Could not load branches
Branch not found: {{ refName }}
Loading
Could not load tags
Nothing to show
Loading
Are you sure you want to change the base?
Some commits from the old base branch may be removed from the timeline,
and old review comments may become outdated.
Open
Changes from 2 commits
Commits
Show all changes
8 commits
Select commit
Hold shift + click to select a range
9378c8f
new BIDSLayoutV2 interface to be used in parallel with legacy BIDSLayout
erdalkaraca 80ccdfb
Merge branch 'bids-standard:master' into master
erdalkaraca 8fe9ad7
Update bids/layout/__init__.py
erdalkaraca 7b4881d
get_<entity>() with fuzzy matching entity name
erdalkaraca 324bd9d
Merge branch 'bids-standard:master' into master
erdalkaraca c1a5a98
WIP: unit tests stabilization, added missing functionality
erdalkaraca f50f657
WIP: return int instead of str for index values of entities (for exam…
erdalkaraca 7f08cdb
Merge branch 'bids-standard:master' into master
erdalkaraca File filter
Filter by extension
Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
There are no files selected for viewing
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,364 @@ | ||
import difflib | ||
import os.path | ||
from collections import OrderedDict | ||
from functools import partial | ||
from pathlib import Path | ||
from typing import List, Union, Dict | ||
|
||
from .utils import BIDSMetadata | ||
from ..exceptions import ( | ||
BIDSEntityError, | ||
BIDSValidationError, | ||
NoMatchError, | ||
TargetError, | ||
) | ||
|
||
from ancpbids import CustomOpExpr, EntityExpr, AllExpr, ValidationPlugin, load_dataset, validate_dataset, \ | ||
write_derivative | ||
from ancpbids.query import query, query_entities, FnMatchExpr, AnyExpr | ||
from ancpbids.utils import deepupdate, resolve_segments, convert_to_relative | ||
|
||
__all__ = ['BIDSLayoutV2'] | ||
|
||
class BIDSLayoutMRIMixin: | ||
def get_tr(self, derivatives=False, **entities): | ||
|
||
"""Return the scanning repetition time (TR) for one or more runs. | ||
|
||
Parameters | ||
---------- | ||
derivatives : bool | ||
If True, also checks derivatives images. | ||
filters : dict | ||
Optional keywords used to constrain the selected runs. | ||
Can be any arguments valid for a .get call (e.g., BIDS entities | ||
or JSON sidecar keys). | ||
|
||
Returns | ||
------- | ||
float | ||
A single float. | ||
|
||
Notes | ||
----- | ||
Raises an exception if more than one unique TR is found. | ||
""" | ||
# Constrain search to functional images | ||
scope = 'all' if derivatives else 'raw' | ||
images = self.get(scope=scope, extension=['.nii', '.nii.gz'], suffix='bold', **entities) | ||
if not images: | ||
raise NoMatchError("No functional images that match criteria found.") | ||
|
||
all_trs = set() | ||
for img in images: | ||
md = img.get_metadata() | ||
all_trs.add(round(float(md['RepetitionTime']), 5)) | ||
|
||
if len(all_trs) > 1: | ||
raise NoMatchError("Unique TR cannot be found given filters {!r}" | ||
.format(entities)) | ||
return all_trs.pop() | ||
|
||
class BIDSLayoutV2(BIDSLayoutMRIMixin): | ||
"""A convenience class to provide access to an in-memory representation of a BIDS dataset. | ||
|
||
.. code-block:: | ||
|
||
dataset_path = 'path/to/your/dataset' | ||
layout = BIDSLayout(dataset_path) | ||
|
||
Parameters | ||
---------- | ||
ds_dir: | ||
the (absolute) path to the dataset to load | ||
""" | ||
|
||
def __init__(self, ds_dir: Union[str, Path], validate=True, **kwargs): | ||
if isinstance(ds_dir, Path): | ||
ds_dir = ds_dir.absolute() | ||
self.dataset = load_dataset(ds_dir) | ||
self.schema = self.dataset.get_schema() | ||
self.validationReport = None | ||
if validate: | ||
self.validationReport = self.validate() | ||
if self.validationReport.has_errors(): | ||
error_message = os.linesep.join(map(lambda error: error['message'], self.validationReport.get_errors())) | ||
raise BIDSValidationError(error_message) | ||
|
||
def __getattr__(self, key): | ||
# replace arbitrary get functions with calls to get | ||
if key.startswith("get_"): | ||
return partial(self.get, "id", key[4:]) | ||
|
||
# give up if the above don't work | ||
raise AttributeError(key) | ||
|
||
def get_metadata(self, path, include_entities=False, scope='all'): | ||
"""Return metadata found in JSON sidecars for the specified file. | ||
|
||
Parameters | ||
---------- | ||
path : str | ||
Path to the file to get metadata for. | ||
include_entities : bool, optional | ||
If True, all available entities extracted | ||
from the filename (rather than JSON sidecars) are included in | ||
the returned metadata dictionary. | ||
scope : str or list, optional | ||
The scope of the search space. Each element must | ||
be one of 'all', 'raw', 'self', 'derivatives', or a | ||
BIDS-Derivatives pipeline name. Defaults to searching all | ||
available datasets. | ||
|
||
Returns | ||
------- | ||
dict | ||
A dictionary of key/value pairs extracted from all of the | ||
target file's associated JSON sidecars. | ||
|
||
Notes | ||
----- | ||
A dictionary containing metadata extracted from all matching .json | ||
files is returned. In cases where the same key is found in multiple | ||
files, the values in files closer to the input filename will take | ||
precedence, per the inheritance rules in the BIDS specification. | ||
|
||
""" | ||
path = convert_to_relative(self.dataset, path) | ||
file = self.dataset.get_file(path) | ||
md = file.get_metadata() | ||
if md and include_entities: | ||
schema_entities = {e.entity_: e.literal_ for e in list(self.schema.EntityEnum)} | ||
md.update({schema_entities[e.key]: e.value for e in file.entities}) | ||
bmd = BIDSMetadata(file.get_absolute_path()) | ||
bmd.update(md) | ||
return bmd | ||
|
||
def get(self, return_type: str = 'object', target: str = None, scope: str = None, | ||
extension: Union[str, List[str]] = None, suffix: Union[str, List[str]] = None, | ||
regex_search=False, | ||
**entities) -> Union[List[str], List[object]]: | ||
"""Depending on the return_type value returns either paths to files that matched the filtering criteria | ||
or :class:`Artifact <ancpbids.model_v1_7_0.Artifact>` objects for further processing by the caller. | ||
|
||
Note that all provided filter criteria are AND combined, i.e. subj='02',task='lang' will match files containing | ||
'02' as a subject AND 'lang' as a task. If you provide a list of values for a criteria, they will be OR combined. | ||
|
||
.. code-block:: | ||
|
||
file_paths = layout.get(subj='02', task='lang', suffix='bold', return_type='files') | ||
|
||
file_paths = layout.get(subj=['02', '03'], task='lang', return_type='files') | ||
|
||
Parameters | ||
---------- | ||
return_type: | ||
Either 'files' to return paths of matched files | ||
or 'object' to return :class:`Artifact <ancpbids.model_v1_7_0.Artifact>` object, defaults to 'object' | ||
|
||
target: | ||
Either `suffixes`, `extensions` or one of any valid BIDS entities key | ||
(see :class:`EntityEnum <ancpbids.model_v1_7_0.EntityEnum>`, defaults to `None` | ||
scope: | ||
a hint where to search for files | ||
If passed, only nodes/directories that match the specified scope will be | ||
searched. Possible values include: | ||
'all' (default): search all available directories. | ||
'derivatives': search all derivatives directories. | ||
'raw': search only BIDS-Raw directories. | ||
'self': search only the directly called BIDSLayout. | ||
<PipelineName>: the name of a BIDS-Derivatives pipeline. | ||
extension: | ||
criterion to match any files containing the provided extension only | ||
suffix: | ||
criterion to match any files containing the provided suffix only | ||
entities | ||
a list of key-values to match the entities of interest, example: subj='02',task='lang' | ||
|
||
Returns | ||
------- | ||
depending on the return_type value either paths to files that matched the filtering criteria | ||
or Artifact objects for further processing by the caller | ||
""" | ||
# Provide some suggestions if target is specified and invalid. | ||
self_entities = self.get_entities() | ||
if target is not None and target not in self_entities: | ||
potential = list(self_entities.keys()) | ||
suggestions = difflib.get_close_matches(target, potential) | ||
if suggestions: | ||
message = "Did you mean one of: {}?".format(suggestions) | ||
else: | ||
message = "Valid targets are: {}".format(potential) | ||
raise TargetError(("Unknown target '{}'. " + message) | ||
.format(target)) | ||
folder = self.dataset | ||
return query(folder, return_type, target, scope, extension, suffix, regex_search, **entities) | ||
|
||
@property | ||
def entities(self): | ||
return self.get_entities() | ||
|
||
def get_entities(self, scope: str = None, sort: bool = False) -> dict: | ||
"""Returns a unique set of entities found within the dataset as a dict. | ||
Each key of the resulting dict contains a list of values (with at least one element). | ||
|
||
Example dict: | ||
.. code-block:: | ||
|
||
{ | ||
'sub': ['01', '02', '03'], | ||
'task': ['gamblestask'] | ||
} | ||
|
||
Parameters | ||
---------- | ||
scope: | ||
see BIDSLayout.get() | ||
sort: default is `False` | ||
whether to sort the keys by name | ||
|
||
Returns | ||
------- | ||
dict | ||
a unique set of entities found within the dataset as a dict | ||
""" | ||
return query_entities(self.dataset, scope, sort) | ||
|
||
def get_dataset_description(self, scope='self', all_=False) -> Union[List[Dict], Dict]: | ||
"""Return contents of dataset_description.json. | ||
|
||
Parameters | ||
---------- | ||
scope : str | ||
The scope of the search space. Only descriptions of | ||
BIDSLayouts that match the specified scope will be returned. | ||
See :obj:`bids.layout.BIDSLayout.get` docstring for valid values. | ||
Defaults to 'self' --i.e., returns the dataset_description.json | ||
file for only the directly-called BIDSLayout. | ||
all_ : bool | ||
If True, returns a list containing descriptions for | ||
all matching layouts. If False (default), returns for only the | ||
first matching layout. | ||
|
||
Returns | ||
------- | ||
dict or list of dict | ||
a dictionary or list of dictionaries (depending on all_). | ||
""" | ||
all_descriptions = self.dataset.select(self.schema.DatasetDescriptionFile).objects(as_list=True) | ||
if all_: | ||
return all_descriptions | ||
return all_descriptions[0] if all_descriptions else None | ||
|
||
def get_dataset(self) -> object: | ||
""" | ||
Returns | ||
------- | ||
the in-memory representation of this layout/dataset | ||
""" | ||
return self.dataset | ||
|
||
def add_derivatives(self, path): | ||
path = convert_to_relative(self.dataset, path) | ||
self.dataset.create_derivative(path=path) | ||
|
||
def write_derivative(self, derivative): | ||
"""Writes the provided derivative folder to the dataset. | ||
Note that a 'derivatives' folder will be created if not present. | ||
|
||
Parameters | ||
---------- | ||
derivative: | ||
the derivative folder to write | ||
""" | ||
assert isinstance(derivative, self.schema.DerivativeFolder) | ||
write_derivative(self.dataset, derivative) | ||
|
||
def validate(self) -> ValidationPlugin.ValidationReport: | ||
"""Validates a dataset and returns a report object containing any detected validation errors. | ||
|
||
Example | ||
---------- | ||
|
||
.. code-block:: | ||
|
||
report = layout.validate() | ||
for message in report.messages: | ||
print(message) | ||
if report.has_errors(): | ||
raise "The dataset contains validation errors, cannot continue". | ||
|
||
Returns | ||
------- | ||
a report object containing any detected validation errors or warning | ||
""" | ||
return validate_dataset(self.dataset) | ||
|
||
@property | ||
def files(self): | ||
return self.get_files() | ||
|
||
def get_files(self, scope='all'): | ||
"""Get BIDSFiles for all layouts in the specified scope. | ||
|
||
Parameters | ||
---------- | ||
scope : str | ||
The scope of the search space. Indicates which | ||
BIDSLayouts' entities to extract. | ||
See :obj:`bids.layout.BIDSLayout.get` docstring for valid values. | ||
|
||
|
||
Returns: | ||
A dict, where keys are file paths and values | ||
are :obj:`bids.layout.BIDSFile` instances. | ||
|
||
""" | ||
all_files = self.get(return_type="object", scope=scope) | ||
files = {file.get_absolute_path(): file for file in all_files} | ||
return files | ||
|
||
def get_file(self, filename, scope='all'): | ||
"""Return the BIDSFile object with the specified path. | ||
|
||
Parameters | ||
---------- | ||
filename : str | ||
The path of the file to retrieve. Must be either an absolute path, | ||
or relative to the root of this BIDSLayout. | ||
scope : str or list, optional | ||
Scope of the search space. If passed, only BIDSLayouts that match | ||
the specified scope will be searched. See :obj:`BIDSLayout.get` | ||
docstring for valid values. Default is 'all'. | ||
|
||
Returns | ||
------- | ||
:obj:`bids.layout.BIDSFile` or None | ||
File found, or None if no match was found. | ||
""" | ||
context = self.dataset | ||
filename = convert_to_relative(self.dataset, filename) | ||
if scope and scope not in ['all', 'raw', 'self']: | ||
context, _ = resolve_segments(context, scope) | ||
return context.get_file(filename) | ||
|
||
@property | ||
def description(self): | ||
return self.get_dataset_description() | ||
|
||
@property | ||
def root(self): | ||
return self.dataset.base_dir_ | ||
|
||
def __repr__(self): | ||
"""Provide a tidy summary of key properties.""" | ||
ents = self.get_entities() | ||
n_subjects = len(set(ents['sub'])) if 'sub' in ents else 0 | ||
n_sessions = len(set(ents['ses'])) if 'ses' in ents else 0 | ||
n_runs = len(set(ents['run'])) if 'run' in ents else 0 | ||
s = ("BIDS Layout: ...{} | Subjects: {} | Sessions: {} | " | ||
"Runs: {}".format(self.dataset.base_dir_, n_subjects, n_sessions, n_runs)) | ||
return s | ||
|
||
|
||
|
Oops, something went wrong.
Add this suggestion to a batch that can be applied as a single commit.
This suggestion is invalid because no changes were made to the code.
Suggestions cannot be applied while the pull request is closed.
Suggestions cannot be applied while viewing a subset of changes.
Only one suggestion per line can be applied in a batch.
Add this suggestion to a batch that can be applied as a single commit.
Applying suggestions on deleted lines is not supported.
You must change the existing code in this line in order to create a valid suggestion.
Outdated suggestions cannot be applied.
This suggestion has been applied or marked resolved.
Suggestions cannot be applied from pending reviews.
Suggestions cannot be applied on multi-line comments.
Suggestions cannot be applied while the pull request is queued to merge.
Suggestion cannot be applied right now. Please check back later.
Uh oh!
There was an error while loading. Please reload this page.