From 13c5217ff1648fd610b9bdccf8d3f5652f45066b Mon Sep 17 00:00:00 2001 From: Alejandro de la Vega Date: Thu, 16 Feb 2023 14:22:48 -0600 Subject: [PATCH 1/3] Use pure sql for return_type = 'id' --- bids/layout/layout.py | 90 +++++++++++++++++++++---------------------- 1 file changed, 43 insertions(+), 47 deletions(-) diff --git a/bids/layout/layout.py b/bids/layout/layout.py index 7ae29cbf6..b5bc00051 100644 --- a/bids/layout/layout.py +++ b/bids/layout/layout.py @@ -645,15 +645,24 @@ def get(self, return_type='object', target=None, scope='all', for l in layouts: query = l._build_file_query(filters=filters, regex_search=regex_search) - # NOTE: The following line, when uncommented, eager loads - # associations. This was introduced in order to prevent sessions - # from randomly detaching. It should be fixed by setting - # expire_on_commit at session creation, but let's leave this here - # for another release or two to make sure we don't have any further - # problems. - # query = query.options(joinedload(BIDSFile.tags) - # .joinedload(Tag.entity)) - results.extend(query.all()) + if return_type == 'id': + results.append(query.with_entities('file_path').subquery()) + else: + results.extend(query.all()) + + if return_type == 'id': + if target is None: + raise TargetError('If return_type is "id" or "dir", a valid ' + 'target entity must also be specified.') + + _res = set() + for sq in results: + _res.union( + self.session.query(Tag._value).filter( + Tag.file_path.in_(sq)).filter_by(entity_name=target).distinct().all() + ) + + return natural_sort(list(_res)) # Convert to relative paths if needed if absolute_paths is None: # can be overloaded as option to .get @@ -668,48 +677,35 @@ def get(self, return_type='object', target=None, scope='all', if return_type.startswith('file'): results = natural_sort([f.path for f in results]) - elif return_type in ['id', 'dir']: + elif return_type == 'dir': + if target is None: raise TargetError('If return_type is "id" or "dir", a valid ' 'target entity must also be specified.') - if return_type == 'id': - u_results = set() - for f in results: - if target in f.entities: - val = f.entities[target] - if isinstance(val, Hashable): - u_results.add(val) - results = list(u_results) - - elif return_type == 'dir': - template = entities[target].directory - if template is None: - raise ValueError('Return type set to directory, but no ' - 'directory template is defined for the ' - 'target entity (\"%s\").' % target) - # Construct regex search pattern from target directory template - # On Windows, the regex won't compile if, e.g., there is a folder starting with "U" on the path. - # Converting to a POSIX path with forward slashes solves this. - template = self._root.as_posix() + template - to_rep = re.findall(r'{(.*?)\}', template) - for ent in to_rep: - patt = entities[ent].pattern - template = template.replace('{%s}' % ent, patt) - # Avoid matching subfolders. We are working with POSIX paths here, so we explicitly use "/" - # as path separator. - template += r'[^/]*$' - matches = [ - f.dirname if absolute_paths else str(f._dirname.relative_to(self._root)) # noqa: E501 - for f in results - if re.search(template, f._dirname.as_posix()) - ] - - results = natural_sort(list(set(matches))) - - else: - raise ValueError("Invalid return_type specified (must be one " - "of 'tuple', 'filename', 'id', or 'dir'.") + template = entities[target].directory + if template is None: + raise ValueError('Return type set to directory, but no ' + 'directory template is defined for the ' + 'target entity (\"%s\").' % target) + # Construct regex search pattern from target directory template + # On Windows, the regex won't compile if, e.g., there is a folder starting with "U" on the path. + # Converting to a POSIX path with forward slashes solves this. + template = self._root.as_posix() + template + to_rep = re.findall(r'{(.*?)\}', template) + for ent in to_rep: + patt = entities[ent].pattern + template = template.replace('{%s}' % ent, patt) + # Avoid matching subfolders. We are working with POSIX paths here, so we explicitly use "/" + # as path separator. + template += r'[^/]*$' + matches = [ + f.dirname if absolute_paths else str(f._dirname.relative_to(self._root)) # noqa: E501 + for f in results + if re.search(template, f._dirname.as_posix()) + ] + + results = natural_sort(list(set(matches))) else: results = natural_sort(results, 'path') From 84911d68ca6d07029f5ecce010cb0954201a704a Mon Sep 17 00:00:00 2001 From: Alejandro de la Vega Date: Thu, 16 Feb 2023 14:30:30 -0600 Subject: [PATCH 2/3] Fix tuple --- bids/layout/layout.py | 8 +++++--- 1 file changed, 5 insertions(+), 3 deletions(-) diff --git a/bids/layout/layout.py b/bids/layout/layout.py index b5bc00051..7ac054abd 100644 --- a/bids/layout/layout.py +++ b/bids/layout/layout.py @@ -657,9 +657,11 @@ def get(self, return_type='object', target=None, scope='all', _res = set() for sq in results: - _res.union( - self.session.query(Tag._value).filter( - Tag.file_path.in_(sq)).filter_by(entity_name=target).distinct().all() + q = self.session.query(Tag._value).filter( + Tag.file_path.in_(sq)).filter_by(entity_name=target).distinct() + + _res.update( + [x[0] for x in q.all()] ) return natural_sort(list(_res)) From 474a7390929c55f5e7db008f888ce8539c8ce1df Mon Sep 17 00:00:00 2001 From: Alejandro de la Vega Date: Thu, 16 Feb 2023 14:47:06 -0600 Subject: [PATCH 3/3] Fix: subquery --- bids/layout/layout.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/bids/layout/layout.py b/bids/layout/layout.py index 7ac054abd..b3961986d 100644 --- a/bids/layout/layout.py +++ b/bids/layout/layout.py @@ -646,7 +646,7 @@ def get(self, return_type='object', target=None, scope='all', query = l._build_file_query(filters=filters, regex_search=regex_search) if return_type == 'id': - results.append(query.with_entities('file_path').subquery()) + results.append(query.with_entities('path').subquery()) else: results.extend(query.all()) @@ -664,7 +664,7 @@ def get(self, return_type='object', target=None, scope='all', [x[0] for x in q.all()] ) - return natural_sort(list(_res)) + return list(_res) # Convert to relative paths if needed if absolute_paths is None: # can be overloaded as option to .get