From 0059f506196311094a1e7c1c8b40548837d55d10 Mon Sep 17 00:00:00 2001 From: Ashley Stewart Date: Tue, 9 Sep 2025 15:08:10 +1000 Subject: [PATCH 01/10] Schema integration first try --- pyproject.toml | 1 + schema_integration_demo.ipynb | 437 ++++++++++++++++++ src/bids/layout/layout.py | 142 +++++- src/bids/layout/models.py | 368 ++++++++++++++- .../layout/tests/test_config_compatibility.py | 378 +++++++++++++++ src/bids/layout/tests/test_schema_accuracy.py | 197 ++++++++ src/bids/layout/tests/test_schema_config.py | 116 +++++ 7 files changed, 1615 insertions(+), 24 deletions(-) create mode 100644 schema_integration_demo.ipynb create mode 100644 src/bids/layout/tests/test_config_compatibility.py create mode 100644 src/bids/layout/tests/test_schema_accuracy.py create mode 100644 src/bids/layout/tests/test_schema_config.py diff --git a/pyproject.toml b/pyproject.toml index 5b0302c2..18aad135 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -30,6 +30,7 @@ dependencies = [ "formulaic >=0.3", "sqlalchemy >=1.4.31", "bids-validator>=1.14.7", # Keep up-to-date to ensure support for recent modalities + "bidsschematools>=0.10", # Required for schema-based configs "num2words >=0.5.10", "click >=8.0", "universal_pathlib >=0.2.2", diff --git a/schema_integration_demo.ipynb b/schema_integration_demo.ipynb new file mode 100644 index 00000000..0109eeef --- /dev/null +++ b/schema_integration_demo.ipynb @@ -0,0 +1,437 @@ +{ + "cells": [ + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "# BIDS Schema Integration Demo\n", + "\n", + "This notebook demonstrates the new BIDS schema integration features in PyBIDS, showing how the new schema-based configuration compares to the legacy JSON-based configuration." + ] + }, + { + "cell_type": "code", + "execution_count": 1, + "metadata": {}, + "outputs": [], + "source": [ + "from bids import BIDSLayout\n", + "from bids.tests import get_test_data_path\n", + "import os" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Setup: Using a test dataset\n", + "\n", + "We'll use the 7T TRT dataset that comes with PyBIDS for testing." + ] + }, + { + "cell_type": "code", + "execution_count": 2, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Using dataset at: /home/ashley/repos/bids/pybids/tests/data/7t_trt\n", + "total 76\n", + "drwxr-xr-x 12 ashley ashley 4096 Aug 21 08:59 .\n", + "drwxr-xr-x 8 ashley ashley 4096 Aug 21 08:59 ..\n", + "-rw-r--r-- 1 ashley ashley 0 Aug 21 08:59 README\n", + "-rw-r--r-- 1 ashley ashley 55 Aug 21 08:59 dataset_description.json\n", + "-rw-r--r-- 1 ashley ashley 482 Aug 21 08:59 participants.tsv\n", + "drwxr-xr-x 4 ashley ashley 4096 Aug 21 08:59 sub-01\n", + "drwxr-xr-x 4 ashley ashley 4096 Aug 21 08:59 sub-02\n", + "drwxr-xr-x 4 ashley ashley 4096 Aug 21 08:59 sub-03\n", + "drwxr-xr-x 4 ashley ashley 4096 Aug 21 08:59 sub-04\n" + ] + } + ], + "source": [ + "# Get path to test dataset\n", + "data_path = os.path.join(get_test_data_path(), '7t_trt')\n", + "print(f\"Using dataset at: {data_path}\")\n", + "\n", + "# List the first few files to see the structure\n", + "!ls -la {data_path} | head -10" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Part 1: Legacy JSON-based Configuration\n", + "\n", + "The traditional PyBIDS approach uses JSON configuration files to define entities and their patterns." + ] + }, + { + "cell_type": "code", + "execution_count": 3, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Legacy layout initialized with 339 files\n", + "\n", + "Available entities: ['CogAtlasID', 'Columns', 'EchoTime', 'EchoTime1', 'EchoTime2', 'EffectiveEchoSpacing', 'IntendedFor', 'MTState', 'PhaseEncodingDirection', 'RepetitionTime', 'SamplingFrequency', 'SliceEncodingDirection', 'SliceTiming', 'StartTime', 'TaskName', 'acquisition', 'ceagent', 'chunk', 'datatype', 'direction', 'echo', 'extension', 'flip', 'fmap', 'inv', 'modality', 'mt', 'nucleus', 'part', 'proc', 'reconstruction', 'recording', 'run', 'sample', 'scans', 'session', 'space', 'staining', 'subject', 'suffix', 'task', 'tracer', 'tracksys', 'volume']\n", + "\n", + "Layout info:\n", + "BIDS Layout: .../bids/pybids/tests/data/7t_trt | Subjects: 10 | Sessions: 20 | Runs: 20\n" + ] + } + ], + "source": [ + "# Initialize layout with legacy config (this is the default)\n", + "layout_legacy = BIDSLayout(data_path, config=['bids'])\n", + "print(f\"Legacy layout initialized with {len(layout_legacy.get())} files\")\n", + "print(f\"\\nAvailable entities: {sorted(layout_legacy.get_entities().keys())}\")\n", + "\n", + "# Print basic layout information like in the docs tutorial\n", + "print(f\"\\nLayout info:\")\n", + "print(layout_legacy)" + ] + }, + { + "cell_type": "code", + "execution_count": 4, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Found 6 BOLD files for subject 01:\n", + " sub-01_ses-1_task-rest_acq-fullbrain_run-1_bold.nii.gz\n", + " sub-01_ses-1_task-rest_acq-fullbrain_run-2_bold.nii.gz\n", + " sub-01_ses-1_task-rest_acq-prefrontal_bold.nii.gz\n", + " sub-01_ses-2_task-rest_acq-fullbrain_run-1_bold.nii.gz\n", + " sub-01_ses-2_task-rest_acq-fullbrain_run-2_bold.nii.gz\n", + " sub-01_ses-2_task-rest_acq-prefrontal_bold.nii.gz\n", + "\n", + "Compare return types:\n", + "return_types=[, ]\n", + "return_types=[, ]\n" + ] + } + ], + "source": [ + "# Query example: Get all BOLD files for subject 01\n", + "bold_files_legacy = layout_legacy.get(\n", + " subject='01', \n", + " suffix='bold', \n", + " extension='nii.gz',\n", + " return_type='filename' # Returns filenames instead of BIDSFile objects\n", + ")\n", + "print(f\"Found {len(bold_files_legacy)} BOLD files for subject 01:\")\n", + "for f in bold_files_legacy:\n", + " print(f\" {os.path.basename(f)}\")\n", + "\n", + "# Show the difference between return types like in docs\n", + "print(f\"\\nCompare return types:\")\n", + "bold_objects = layout_legacy.get(subject='01', suffix='bold', extension='nii.gz')[:2]\n", + "bold_filenames = layout_legacy.get(subject='01', suffix='bold', extension='nii.gz', return_type='filename')[:2]\n", + "\n", + "print(f\"return_types={[type(obj) for obj in bold_objects]}\")\n", + "print(f\"return_types={[type(fname) for fname in bold_filenames]}\")" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Part 2: New Schema-based Configuration\n", + "\n", + "The new approach uses the BIDS schema directly via `bidsschematools` to understand the dataset structure." + ] + }, + { + "cell_type": "code", + "execution_count": 5, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Schema-based layout initialized with 339 files\n", + "\n", + "Available entities: ['CogAtlasID', 'Columns', 'EchoTime', 'EchoTime1', 'EchoTime2', 'EffectiveEchoSpacing', 'IntendedFor', 'MTState', 'PhaseEncodingDirection', 'RepetitionTime', 'SamplingFrequency', 'SliceEncodingDirection', 'SliceTiming', 'StartTime', 'TaskName', 'acquisition', 'ceagent', 'chunk', 'datatype', 'density', 'description', 'direction', 'echo', 'extension', 'flip', 'hemisphere', 'inversion', 'label', 'modality', 'mtransfer', 'nucleus', 'part', 'processing', 'reconstruction', 'recording', 'resolution', 'run', 'sample', 'segmentation', 'session', 'space', 'split', 'stain', 'subject', 'suffix', 'task', 'tracer', 'tracksys', 'volume']\n", + "\n", + "Layout info:\n", + "BIDS Layout: .../bids/pybids/tests/data/7t_trt | Subjects: 10 | Sessions: 20 | Runs: 20\n" + ] + } + ], + "source": [ + "# Initialize layout with schema-based config\n", + "# This uses Config.load('bids-schema') internally when you pass 'bids-schema' in config\n", + "layout_schema = BIDSLayout(data_path, config=['bids-schema'])\n", + "print(f\"Schema-based layout initialized with {len(layout_schema.get())} files\")\n", + "print(f\"\\nAvailable entities: {sorted(layout_schema.get_entities().keys())}\")\n", + "\n", + "# Print basic layout information\n", + "print(f\"\\nLayout info:\")\n", + "print(layout_schema)" + ] + }, + { + "cell_type": "code", + "execution_count": 6, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Found 6 BOLD files for subject 01:\n", + " sub-01_ses-1_task-rest_acq-fullbrain_run-1_bold.nii.gz\n", + " sub-01_ses-1_task-rest_acq-fullbrain_run-2_bold.nii.gz\n", + " sub-01_ses-1_task-rest_acq-prefrontal_bold.nii.gz\n", + " sub-01_ses-2_task-rest_acq-fullbrain_run-1_bold.nii.gz\n", + " sub-01_ses-2_task-rest_acq-fullbrain_run-2_bold.nii.gz\n", + " sub-01_ses-2_task-rest_acq-prefrontal_bold.nii.gz\n", + "\n", + "Compare return types:\n", + "return_types=[, ]\n", + "return_types=[, ]\n" + ] + } + ], + "source": [ + "# Same query with schema-based layout\n", + "bold_files_schema = layout_schema.get(\n", + " subject='01', \n", + " suffix='bold', \n", + " extension='nii.gz',\n", + " return_type='filename'\n", + ")\n", + "print(f\"Found {len(bold_files_schema)} BOLD files for subject 01:\")\n", + "for f in bold_files_schema:\n", + " print(f\" {os.path.basename(f)}\")\n", + "\n", + "# Show the difference between return types like in docs\n", + "print(f\"\\nCompare return types:\")\n", + "bold_objects_schema = layout_schema.get(subject='01', suffix='bold', extension='nii.gz')[:2]\n", + "bold_filenames_schema = layout_schema.get(subject='01', suffix='bold', extension='nii.gz', return_type='filename')[:2]\n", + "\n", + "print(f\"return_types={[type(obj) for obj in bold_objects_schema]}\")\n", + "print(f\"return_types={[type(fname) for fname in bold_filenames_schema]}\")\n" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Part 3: Comparing the Two Approaches\n", + "\n", + "Let's verify that both approaches give us the same results." + ] + }, + { + "cell_type": "code", + "execution_count": 7, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Legacy layout: 339 files\n", + "Schema layout: 339 files\n", + "\n", + "Match: True\n" + ] + } + ], + "source": [ + "# Compare file counts\n", + "legacy_count = len(layout_legacy.get())\n", + "schema_count = len(layout_schema.get())\n", + "\n", + "print(f\"Legacy layout: {legacy_count} files\")\n", + "print(f\"Schema layout: {schema_count} files\")\n", + "print(f\"\\nMatch: {legacy_count == schema_count}\")" + ] + }, + { + "cell_type": "code", + "execution_count": 8, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Legacy entities: 44\n", + "Schema entities: 49\n", + "\n", + "Entities only in legacy: ['fmap', 'inv', 'mt', 'proc', 'scans', 'staining']\n", + "\n", + "Entities only in schema: ['density', 'description', 'hemisphere', 'inversion', 'label', 'mtransfer', 'processing', 'resolution', 'segmentation', 'split', 'stain']\n" + ] + } + ], + "source": [ + "# Compare entities\n", + "legacy_entities = set(layout_legacy.get_entities().keys())\n", + "schema_entities = set(layout_schema.get_entities().keys())\n", + "\n", + "print(f\"Legacy entities: {len(legacy_entities)}\")\n", + "print(f\"Schema entities: {len(schema_entities)}\")\n", + "\n", + "# Check for differences\n", + "only_legacy = legacy_entities - schema_entities\n", + "only_schema = schema_entities - legacy_entities\n", + "\n", + "if only_legacy:\n", + " print(f\"\\nEntities only in legacy: {sorted(only_legacy)}\")\n", + "if only_schema:\n", + " print(f\"\\nEntities only in schema: {sorted(only_schema)}\")\n", + "if not only_legacy and not only_schema:\n", + " print(\"\\nAll entities match!\")" + ] + }, + { + "cell_type": "code", + "execution_count": 9, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Query: {'suffix': 'bold', 'extension': 'nii.gz'}\n", + "Legacy: 60 files\n", + "Schema: 60 files\n", + "✓ Results match!\n", + "\n", + "Query: {'suffix': 'T1w', 'extension': 'nii.gz'}\n", + "Legacy: 10 files\n", + "Schema: 10 files\n", + "✓ Results match!\n", + "\n", + "Query: {'subject': '01', 'suffix': 'scans', 'extension': 'tsv'}\n", + "Legacy: 2 files\n", + "Schema: 2 files\n", + "✓ Results match!\n", + "\n" + ] + } + ], + "source": [ + "# Compare specific query results\n", + "def compare_queries(layout1, layout2, name1=\"Layout 1\", name2=\"Layout 2\", **kwargs):\n", + " \"\"\"Compare results from two layouts for the same query.\"\"\"\n", + " result1 = set(layout1.get(return_type='filename', **kwargs))\n", + " result2 = set(layout2.get(return_type='filename', **kwargs))\n", + " \n", + " print(f\"Query: {kwargs}\")\n", + " print(f\"{name1}: {len(result1)} files\")\n", + " print(f\"{name2}: {len(result2)} files\")\n", + " \n", + " if result1 == result2:\n", + " print(\"✓ Results match!\\n\")\n", + " else:\n", + " only1 = result1 - result2\n", + " only2 = result2 - result1\n", + " if only1:\n", + " print(f\"Only in {name1}: {[os.path.basename(f) for f in only1]}\")\n", + " if only2:\n", + " print(f\"Only in {name2}: {[os.path.basename(f) for f in only2]}\")\n", + " print()\n", + "\n", + "# Test various queries\n", + "compare_queries(layout_legacy, layout_schema, \"Legacy\", \"Schema\", \n", + " suffix='bold', extension='nii.gz')\n", + "\n", + "compare_queries(layout_legacy, layout_schema, \"Legacy\", \"Schema\",\n", + " suffix='T1w', extension='nii.gz')\n", + "\n", + "# 1. Test with scans TSV files (these exist)\n", + "compare_queries(layout_legacy, layout_schema, \"Legacy\", \"Schema\",\n", + "subject='01', suffix='scans', extension='tsv')\n" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Part 5: Performance Comparison\n", + "\n", + "Let's compare the initialization time for both approaches." + ] + }, + { + "cell_type": "code", + "execution_count": 10, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Legacy initialization: 0.684 seconds\n", + "Schema initialization: 0.679 seconds\n", + "\n", + "Difference: 0.005 seconds\n", + "Schema is faster by 0.8%\n" + ] + } + ], + "source": [ + "import time\n", + "\n", + "# Time legacy initialization\n", + "start = time.time()\n", + "layout_legacy_timed = BIDSLayout(data_path, config=['bids'], reset_database=True)\n", + "legacy_time = time.time() - start\n", + "\n", + "# Time schema initialization \n", + "start = time.time()\n", + "layout_schema_timed = BIDSLayout(data_path, config=['bids-schema'], reset_database=True)\n", + "schema_time = time.time() - start\n", + "\n", + "print(f\"Legacy initialization: {legacy_time:.3f} seconds\")\n", + "print(f\"Schema initialization: {schema_time:.3f} seconds\")\n", + "print(f\"\\nDifference: {abs(legacy_time - schema_time):.3f} seconds\")\n", + "print(f\"Schema is {'faster' if schema_time < legacy_time else 'slower'} by {abs(1 - schema_time/legacy_time)*100:.1f}%\")" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [] + } + ], + "metadata": { + "kernelspec": { + "display_name": "Python 3", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.10.18" + } + }, + "nbformat": 4, + "nbformat_minor": 4 +} diff --git a/src/bids/layout/layout.py b/src/bids/layout/layout.py index 998d26a3..afa2c399 100644 --- a/src/bids/layout/layout.py +++ b/src/bids/layout/layout.py @@ -414,6 +414,93 @@ def get_entities(self, scope='all', metadata=None): entities.update({e.name: e for e in results}) return entities + def _find_directories_for_entity_using_schema(self, entity_name, results): + """Find directories using schema directory rules.""" + from bidsschematools import schema as bst_schema + import os + + # Load schema directory rules + bids_schema = bst_schema.load_schema() + if not hasattr(bids_schema.rules, 'directories') or not hasattr(bids_schema.rules.directories, 'raw'): + # Fallback to simple approach if no schema rules + return [f.dirname for f in results if entity_name in f.entities] + + directory_rules = bids_schema.rules.directories.raw + directories = set() + + # Build directory paths using schema rules for each file + for f in results: + if entity_name in f.entities: + # Build path based on schema directory hierarchy + path_components = self._build_path_from_schema_rules(f.entities, directory_rules) + if path_components: + # Convert to full directory path + directory_path = os.path.join(self.root, *path_components) + directories.add(directory_path) + else: + # Fallback to actual path if schema reconstruction fails + directories.add(f.dirname) + + return list(directories) + + def _build_path_from_schema_rules(self, entities, directory_rules): + """Build directory path components from schema rules and entity values.""" + path_components = [] + current_level = 'root' + + # Traverse schema directory hierarchy + while current_level: + rule = directory_rules.get(current_level) + if not rule: + break + + rule_dict = dict(rule) + rule_entity = rule_dict.get('entity') + + # If this rule corresponds to an entity, add directory component + if rule_entity and rule_entity in entities: + entity_value = entities[rule_entity] + if rule_entity == 'subject': + path_components.append(f'sub-{entity_value}') + elif rule_entity == 'session': + path_components.append(f'ses-{entity_value}') + else: + path_components.append(str(entity_value)) + elif current_level == 'datatype' and 'datatype' in entities: + # Special case: datatype rule has no entity field but we use the datatype value + path_components.append(entities['datatype']) + + # Determine next level based on subdirs and available entities + subdirs = rule_dict.get('subdirs', []) + next_level = None + + # First pass: look for entity-based subdirs across all options + entity_candidates = [] + rule_candidates = [] + + for subdir in subdirs: + if isinstance(subdir, dict) and 'oneOf' in subdir: + for option in subdir['oneOf']: + if option in entities: + entity_candidates.append(option) + elif option in directory_rules: + rule_candidates.append(option) + elif isinstance(subdir, str): + if subdir in entities: + entity_candidates.append(subdir) + elif subdir in directory_rules: + rule_candidates.append(subdir) + + # Prefer entity candidates, fallback to rule candidates + if entity_candidates: + next_level = entity_candidates[0] # Take first entity match + elif rule_candidates: + next_level = rule_candidates[0] # Take first rule match + + current_level = next_level + + return path_components + def get_files(self, scope='all'): """Get BIDSFiles for all layouts in the specified scope. @@ -678,7 +765,7 @@ def get(self, return_type='object', target=None, scope='all', '`layout.get(**filters)`.') # Ensure leading periods if extensions were passed - if 'extension' in filters and 'bids' in self.config: + if 'extension' in filters and ('bids' in self.config or any('bids-schema' in c for c in self.config)): filters['extension'] = ['.' + x.lstrip('.') if isinstance(x, str) else x for x in listify(filters['extension'])] @@ -746,28 +833,37 @@ def get(self, return_type='object', target=None, scope='all', )) results = natural_sort(list(set(results))) elif return_type == 'dir': - template = entities[target].directory - if template is None: - raise ValueError('Return type set to directory, but no ' - 'directory template is defined for the ' - 'target entity (\"%s\").' % target) - # Construct regex search pattern from target directory template - # On Windows, the regex won't compile if, e.g., there is a folder starting with "U" on the path. - # Converting to a POSIX path with forward slashes solves this. - template = self._root.as_posix() + template - to_rep = re.findall(r'{(.*?)\}', template) - for ent in to_rep: - patt = entities[ent].pattern - template = template.replace('{%s}' % ent, patt) - # Avoid matching subfolders. We are working with POSIX paths here, so we explicitly use "/" - # as path separator. - template += r'[^/]*$' - matches = [ - f.dirname - for f in results - if re.search(template, f._dirname.as_posix()) - ] - results = natural_sort(list(set(matches))) + # Check if we're using schema-based config or legacy config + using_schema = any('bids-schema' in str(c) for c in listify(self.config)) + + if using_schema: + # Use schema directory rules for schema-based configs + directories = self._find_directories_for_entity_using_schema(target, results) + results = natural_sort(list(set(directories))) + else: + # Use original template-based approach for legacy configs + template = entities[target].directory + if template is None: + raise ValueError('Return type set to directory, but no ' + 'directory template is defined for the ' + 'target entity (\"%s\").' % target) + # Construct regex search pattern from target directory template + # On Windows, the regex won't compile if, e.g., there is a folder starting with "U" on the path. + # Converting to a POSIX path with forward slashes solves this. + template = self._root.as_posix() + template + to_rep = re.findall(r'{(.*?)\}', template) + for ent in to_rep: + patt = entities[ent].pattern + template = template.replace('{%s}' % ent, patt) + # Avoid matching subfolders. We are working with POSIX paths here, so we explicitly use "/" + # as path separator. + template += r'[^/]*$' + matches = [ + f.dirname + for f in results + if re.search(template, f._dirname.as_posix()) + ] + results = natural_sort(list(set(matches))) else: results = natural_sort(results, 'path') diff --git a/src/bids/layout/models.py b/src/bids/layout/models.py index 104e1f42..48f5a285 100644 --- a/src/bids/layout/models.py +++ b/src/bids/layout/models.py @@ -16,6 +16,10 @@ from sqlalchemy import Column, String, Boolean, ForeignKey, Table from sqlalchemy.orm import reconstructor, relationship, backref, object_session +from bidsschematools import schema as bst_schema +from bidsschematools import rules +import re + try: from sqlalchemy.orm import declarative_base except ImportError: # sqlalchemy < 1.4 @@ -148,7 +152,7 @@ def _init_on_load(self): self.default_path_patterns = json.loads(self._default_path_patterns) @classmethod - def load(self, config, session=None): + def load(cls, config, session=None): """Load a Config instance from the passed configuration data. Parameters @@ -160,6 +164,8 @@ def load(self, config, session=None): (e.g., 'bids' or 'derivatives') * A path to a JSON file containing config information * A dictionary containing config information + * 'bids-schema' to load from the official BIDS schema + * dict with 'schema_version' key to load specific schema version session : :obj:`sqlalchemy.orm.session.Session` or None An optional SQLAlchemy Session instance. If passed, the session is used to check the database for (and @@ -170,6 +176,16 @@ def load(self, config, session=None): A Config instance. """ + # Handle schema-based config loading + if config == 'bids-schema': + return cls._from_schema(session=session) + elif isinstance(config, dict) and 'schema_version' in config: + return cls._from_schema( + schema_version=config['schema_version'], + session=session + ) + + # Existing JSON/file-based loading if isinstance(config, (str, Path, UPath)): config_paths = get_option('config_paths') if config in config_paths: @@ -188,6 +204,356 @@ def load(self, config, session=None): return Config(session=session, **config) + @classmethod + def _from_schema(cls, schema_version=None, session=None): + """Load config from BIDS schema - store patterns directly.""" + from bidsschematools import rules, schema as bidsschema + + bids_schema = bidsschema.load_schema() + + # Collect ALL patterns from bidsschematools (keep existing collection logic) + all_patterns = [] + file_sections = { + 'raw': bids_schema.rules.files.raw, + 'deriv': bids_schema.rules.files.deriv, + 'common': bids_schema.rules.files.common + } + + for section_type, sections in file_sections.items(): + if section_type == 'raw': + for datatype_name in sections.keys(): + datatype_rules = getattr(sections, datatype_name) + regex_rules = rules.regexify_filename_rules(datatype_rules, bids_schema, level=1) + all_patterns.extend(regex_rules) + else: + for subsection_name in sections.keys(): + subsection_rules = getattr(sections, subsection_name) + regex_rules = rules.regexify_filename_rules(subsection_rules, bids_schema, level=1) + all_patterns.extend(regex_rules) + + # Store patterns directly - NO PARSING EVER + config_name = f'bids-schema-{bids_schema.bids_version}' + + # Extract entities directly from schema rules - no regex parsing! + entity_names = cls._extract_entity_names_from_rules(bids_schema) + + # Extract actual values for key entities from schema rules directly + entity_values = cls._extract_entity_values_from_rules(bids_schema, entity_names) + + # Create Entity objects + entities = cls._create_entities_from_schema(entity_names, entity_values, bids_schema) + + config = cls(name=config_name, entities=entities, session=session) + + return config + + @classmethod + def _extract_entity_names_from_rules(cls, bids_schema): + """Extract entity names directly from schema rules - no regex parsing!""" + + # Get entity names from all rule sections + entity_names = set() + file_sections = { + 'raw': bids_schema.rules.files.raw, + 'deriv': bids_schema.rules.files.deriv, + 'common': bids_schema.rules.files.common + } + + for section_type, sections in file_sections.items(): + for section_name in sections.keys(): + section_rules = getattr(sections, section_name) + for rule_name in section_rules.keys(): + rule = getattr(section_rules, rule_name) + rule_dict = dict(rule) + if 'entities' in rule_dict: + entity_names.update(rule_dict['entities'].keys()) + + # Add special constructs that appear in patterns but aren't "entities" + entity_names.update(['suffix', 'extension', 'datatype']) + + return entity_names + + @classmethod + def _create_entities_from_schema(cls, entity_names, entity_values, bids_schema): + """Create Entity objects from schema information.""" + entities = [] + + for entity_name in sorted(entity_names): + # Get entity info from schema if available + entity_spec = bids_schema.objects.entities.get(entity_name, {}) + + # Handle special entities that don't have BIDS prefixes + special_entities = {'suffix', 'extension', 'datatype'} + + if entity_name in special_entities: + # These entities don't have prefixes - they appear directly + if entity_name in entity_values and entity_values[entity_name]: + # Filter out problematic values for extension entity + if entity_name == 'extension': + # Remove empty string and wildcard patterns that would match everything + filtered_values = {v for v in entity_values[entity_name] + if v and v not in ('.*', '**', '*')} + values = '|'.join(sorted(filtered_values)) + else: + values = '|'.join(sorted(entity_values[entity_name])) + + if entity_name == 'extension': + pattern = fr'({values})\Z' # Extensions are at the end + elif entity_name == 'datatype': + pattern = fr'/({values})/' # Datatypes are directory names + else: # suffix + pattern = fr'[_/\\\\]({values})\.?' # Before extension, with optional dot + else: + # Regular entities - use schema format patterns directly! + bids_prefix = entity_spec.get('name', entity_name) + format_type = entity_spec.get('format', 'label') + + # Get the official value pattern from schema formats + if format_type in bids_schema.objects.formats: + format_obj = dict(bids_schema.objects.formats[format_type]) + value_pattern = format_obj['pattern'] # [0-9a-zA-Z]+, [0-9]+, etc. + + # Combine prefix + schema format pattern + if entity_name in ['subject', 'session']: + pattern = fr'[/\\\\]+{bids_prefix}-({value_pattern})' + else: + pattern = fr'[_/\\\\]+{bids_prefix}-({value_pattern})' + + entity_config = { + 'name': entity_name, + 'pattern': pattern, + 'dtype': 'int' if entity_spec.get('format') == 'index' else 'str' + } + + entities.append(entity_config) + + return entities + + @classmethod + def _extract_entity_values_from_rules(cls, bids_schema, entity_names): + """Extract entity values directly from schema rules - no regex parsing!""" + + entity_values = {name: set() for name in entity_names} + file_sections = { + 'raw': bids_schema.rules.files.raw, + 'deriv': bids_schema.rules.files.deriv, + 'common': bids_schema.rules.files.common + } + + for section_type, sections in file_sections.items(): + for section_name in sections.keys(): + section_rules = getattr(sections, section_name) + for rule_name in section_rules.keys(): + rule = getattr(section_rules, rule_name) + rule_dict = dict(rule) + + # Get values for special entities directly from rules + if 'suffixes' in rule_dict: + entity_values['suffix'].update(rule_dict['suffixes']) + if 'extensions' in rule_dict: + entity_values['extension'].update(rule_dict['extensions']) + if 'datatypes' in rule_dict: + entity_values['datatype'].update(rule_dict['datatypes']) + + # For regular entities, we don't extract specific values + # They use generic patterns like [0-9a-zA-Z]+ + + return entity_values + + + + @staticmethod + def _extract_separator_from_context(entity_name, before_context, after_context, full_pattern, schema): + """Extract separator pattern from schema context. + + Analyzes the actual schema pattern context to determine what + separators are used for this entity by examining the schema regex + and entity classification. + """ + # Find the entity in the full pattern and extract its actual context + entity_match = re.search(rf'(\S*?)(\(\?P<{entity_name}>[^)]+\))', full_pattern) + if not entity_match: + return f"[_/\\\\]+{entity_name}-" # Fallback + + prefix_context = entity_match.group(1) + + # Determine entity type from schema - is it a real entity or special construct? + is_real_entity = entity_name in schema.objects.entities + + if not is_real_entity: + # This is a schema construct (extension/suffix/datatype capturing group) + # Analyze the pattern context to determine positioning rules + + # Analyze from the actual pattern context + if '/' in before_context and '/' in after_context: + # Appears between directory separators (like datatype) + return "[/\\\\]+" + elif before_context.endswith(')'): + # Comes directly after another capturing group (like extension after suffix) + return "" + elif '_)?' in before_context or 'chunk)_)?' in before_context: + # Comes after underscore separators from entities (like suffix) + return "[_/\\\\]+" + else: + # Analyze position in full pattern to determine context + entity_pos = full_pattern.find(f'(?P<{entity_name}>') + pattern_before = full_pattern[:entity_pos] + + # Count directory structure elements before this construct + slash_count = pattern_before.count('/') + + if slash_count <= 2: + # In directory part - use slash separators + return "[/\\\\]+" + else: + # In filename part - analyze further + if ')(?P<' in pattern_before[-20:]: + # Directly after another construct + return "" + else: + # After separator + return "[_/\\\\]+" + + # Extract the actual prefix used in the schema pattern + # Look for patterns like 'sub-(?P' or '_task-(?P' + prefix_pattern = re.search(rf'([/_]?)([a-zA-Z]+)-\(\?P<{entity_name}>', full_pattern) + if prefix_pattern: + separator_char = prefix_pattern.group(1) + prefix = prefix_pattern.group(2) + + if separator_char == '/': + return f"[/\\\\]+{prefix}-" + elif separator_char == '_': + return f"[_/\\\\]+{prefix}-" + else: # Direct prefix (no separator before) + # Check position to determine separator type + entity_pos = full_pattern.find(f'(?P<{entity_name}>') + pattern_before = full_pattern[:entity_pos] + slash_count = pattern_before.count('/') + + if slash_count <= 2: # Directory part + return f"[/\\\\]+{prefix}-" + else: # Filename part + return f"[_/\\\\]+{prefix}-" + + # Try to extract any entity prefix from before_context + prefix_match = re.search(r'([a-zA-Z]+)-$', before_context) + if prefix_match: + prefix = prefix_match.group(1) + # Verify this is a real entity prefix from schema + for entity in schema.objects.entities.values(): + if entity.get('name') == prefix: + if '/' in before_context: + return f"[/\\\\]+{prefix}-" + else: + return f"[_/\\\\]+{prefix}-" + + # For entities with prefixes, extract from schema pattern + broad_match = re.search(rf'([a-zA-Z]+)-\(\?P<{entity_name}>', full_pattern) + if broad_match: + prefix = broad_match.group(1) + + # Determine separator type from position in pattern + entity_pos = full_pattern.find(f'(?P<{entity_name}>') + pattern_before_entity = full_pattern[:entity_pos] + slash_count = pattern_before_entity.count('/') + + if slash_count <= 2: # Directory part + return f"[/\\\\]+{prefix}-" + else: # Filename part + return f"[_/\\\\]+{prefix}-" + + # If no prefix pattern found, this entity shouldn't have a prefix + # This handles cases where entities appear directly without prefix- + return f"[_/\\\\]+{entity_name}-" # Final fallback + + @staticmethod + def _extract_directory_config(entity_name, pattern, schema): + """Extract directory configuration from schema directory rules.""" + + # Get directory rules from schema + if not hasattr(schema.rules, 'directories') or not hasattr(schema.rules.directories, 'raw'): + return None + + dir_rules = schema.rules.directories.raw + + # Check if this entity has a directory rule + for rule_name, rule in dir_rules.items(): + rule_dict = dict(rule) + if rule_dict.get('entity') == entity_name: + # This entity creates directories + + # Build directory template based on schema hierarchy + # Find parent entities by checking what directories can contain this entity + parent_entities = [] + + # Check all directory rules to see which ones list this entity in subdirs + for parent_rule_name, parent_rule in dir_rules.items(): + parent_rule_dict = dict(parent_rule) + subdirs = parent_rule_dict.get('subdirs', []) + + # Check if this entity is in parent's subdirs + for subdir in subdirs: + if isinstance(subdir, dict) and 'oneOf' in subdir: + if entity_name in subdir['oneOf']: + if parent_rule_dict.get('entity'): + parent_entities.append(parent_rule_dict['entity']) + elif subdir == entity_name: + if parent_rule_dict.get('entity'): + parent_entities.append(parent_rule_dict['entity']) + + # Build template with parent hierarchy + template_parts = [] + template_parts.extend([f'{{{parent}}}' for parent in parent_entities]) + template_parts.append(f'{{{entity_name}}}') + + return ''.join(template_parts) + + return None + + @staticmethod + def _entity_appears_in_all_modalities(entity_name, schema): + """Check if an entity appears in ALL modalities in the schema. + + Entities that appear in all modalities (like suffix, extension) should have + their values combined across all modalities to ensure complete coverage. + + Parameters + ---------- + entity_name : str + The entity name to check + schema : object + The BIDS schema object + + Returns + ------- + bool + True if entity appears in all modalities, False otherwise + """ + # Get all modalities + all_modalities = list(schema.rules.files.raw.keys()) + if not all_modalities: + return False + + modalities_with_entity = set() + + # Check each modality for this entity + for modality in all_modalities: + datatype_rules = getattr(schema.rules.files.raw, modality) + patterns = rules.regexify_filename_rules(datatype_rules, schema, level=1) + + # Check if entity appears in any pattern for this modality + for pattern in patterns: + entities_in_pattern = re.findall(r'\(\?P<([^>]+)>', pattern['regex']) + if entity_name in entities_in_pattern: + modalities_with_entity.add(modality) + break # Found in this modality, move to next + + # Return True if entity appears in ALL modalities + return len(modalities_with_entity) == len(all_modalities) + + + def __repr__(self): return f"" diff --git a/src/bids/layout/tests/test_config_compatibility.py b/src/bids/layout/tests/test_config_compatibility.py new file mode 100644 index 00000000..b9904f04 --- /dev/null +++ b/src/bids/layout/tests/test_config_compatibility.py @@ -0,0 +1,378 @@ +"""Tests to verify schema-driven config matches old JSON config behavior.""" + +import pytest +import re +from bids.layout.models import Config + + +class TestConfigCompatibility: + """Test that new schema-driven config matches old JSON config behavior.""" + + def test_core_entities_present_in_both_configs(self): + """Test that essential BIDS entities are present in both configs.""" + old_config = Config.load('bids') + new_config = Config.load('bids-schema') + + # Test that both configs support the fundamental BIDS entities + core_entities = ['subject', 'session', 'task', 'run', 'acquisition'] + + # Check core entities exist (allowing for naming differences) + old_has_core = all(entity in old_config.entities for entity in core_entities) + new_has_core = all(entity in new_config.entities for entity in core_entities) + + assert old_has_core and new_has_core, "Both configs should support core BIDS entities" + + def test_schema_entities_are_superset_of_valid_old_entities(self): + """Test that schema includes all valid entities from old config.""" + old_config = Config.load('bids') + new_config = Config.load('bids-schema') + + # Map old config entities to their schema equivalents + entity_mapping = { + 'inv': 'inversion', + 'mt': 'mtransfer', + 'proc': 'processing', + # 'fmap', 'scans', 'staining' are invalid - exclude them + } + + valid_old_entities = set(old_config.entities.keys()) - {'fmap', 'scans', 'staining'} + missing_from_new = [] + + for old_entity in valid_old_entities: + expected_new_entity = entity_mapping.get(old_entity, old_entity) + if expected_new_entity not in new_config.entities: + missing_from_new.append(f"{old_entity} -> {expected_new_entity}") + + assert len(missing_from_new) == 0, f"Schema missing valid entities: {missing_from_new}" + + def test_suffix_coverage_matches(self): + """Test that suffix patterns cover the same suffixes.""" + #pytest.skip("Old config uses generic catch-all pattern while new config uses explicit schema suffixes. " + # "Cannot meaningfully compare pattern extraction. See test_new_suffixes_compatible_with_old_pattern instead.") + + # Original implementation preserved for reference + old_config = Config.load('bids') + new_config = Config.load('bids-schema') + + # Extract suffixes from patterns + old_suffixes = self._extract_suffixes_from_pattern(old_config.entities['suffix'].pattern) + new_suffixes = self._extract_suffixes_from_pattern(new_config.entities['suffix'].pattern) + + missing = old_suffixes - new_suffixes + extra = new_suffixes - old_suffixes + + error_msg = [] + if missing: + error_msg.append(f"Missing suffixes: {sorted(missing)}") + if extra: + error_msg.append(f"Extra suffixes: {sorted(extra)}") + + # Core BIDS suffixes should be preserved + core_suffixes = {'bold', 'T1w', 'T2w', 'dwi', 'events'} + assert core_suffixes.issubset(new_suffixes), f"Missing core suffixes; {'; '.join(error_msg)}" + + # Report coverage for analysis + coverage = len(old_suffixes & new_suffixes) / len(old_suffixes) if old_suffixes else 0 + if coverage < 1.0 or missing or extra: + pytest.fail(f"Suffix coverage: {coverage:.1%}; {'; '.join(error_msg)}") + + def test_new_suffixes_compatible_with_old_pattern(self): + """Test that all new schema suffixes would be accepted by old generic pattern.""" + old_config = Config.load('bids') + new_config = Config.load('bids-schema') + + # Get old pattern (generic) and new suffixes (explicit) + old_pattern = old_config.entities['suffix'].regex + new_suffixes = self._extract_suffixes_from_pattern(new_config.entities['suffix'].pattern) + + # Test that old pattern would accept all new suffixes + incompatible_suffixes = [] + for suffix in new_suffixes: + test_filename = f"sub-01_task-test_{suffix}.nii.gz" + if not old_pattern.search(test_filename): + incompatible_suffixes.append(suffix) + + assert len(incompatible_suffixes) == 0, f"Old pattern would reject these valid schema suffixes: {incompatible_suffixes}" + + def test_extension_coverage_matches(self): + """Test that extension patterns cover the same extensions.""" + #pytest.skip("Old config uses generic catch-all pattern while new config uses explicit schema extensions. " + # "Cannot meaningfully compare pattern extraction. See test_new_extensions_compatible_with_old_pattern instead.") + + # Original implementation preserved for reference + old_config = Config.load('bids') + new_config = Config.load('bids-schema') + + # Extract extensions from patterns + old_extensions = self._extract_extensions_from_pattern(old_config.entities['extension'].pattern) + new_extensions = self._extract_extensions_from_pattern(new_config.entities['extension'].pattern) + + missing = old_extensions - new_extensions + extra = new_extensions - old_extensions + + error_msg = [] + if missing: + error_msg.append(f"Missing extensions: {sorted(missing)}") + if extra: + error_msg.append(f"Extra extensions: {sorted(extra)}") + + # Core extensions should be preserved + core_extensions = {'.nii.gz', '.nii', '.tsv', '.json'} + assert core_extensions.issubset(new_extensions), f"Missing core extensions; {'; '.join(error_msg)}" + + # Report coverage for analysis + coverage = len(old_extensions & new_extensions) / len(old_extensions) if old_extensions else 0 + if coverage < 1.0 or missing or extra: + pytest.fail(f"Extension coverage: {coverage:.1%}; {'; '.join(error_msg)}") + + def test_new_extensions_compatible_with_old_pattern(self): + """Test that all new schema extensions would be accepted by old generic pattern.""" + old_config = Config.load('bids') + new_config = Config.load('bids-schema') + + # Get old pattern (generic) and new extensions (explicit) + old_pattern = old_config.entities['extension'].regex + new_extensions = self._extract_extensions_from_pattern(new_config.entities['extension'].pattern) + + # Test that old pattern would accept all new extensions + incompatible_extensions = [] + directory_extensions = [] + + for extension in new_extensions: + test_filename = f"sub-01_task-test_bold{extension}" + if not old_pattern.search(test_filename): + if extension.endswith('/'): + # Directory-style extensions are a new BIDS feature not supported by old pattern + directory_extensions.append(extension) + else: + incompatible_extensions.append(extension) + + # Core file extensions should be compatible + assert len(incompatible_extensions) == 0, f"Old pattern would reject these valid schema file extensions: {incompatible_extensions}" + + # Report directory extensions as expected difference + if directory_extensions: + print(f"Note: {len(directory_extensions)} directory-style extensions are new BIDS features: {directory_extensions}") + + # Core extensions should be present + core_extensions = {'.nii.gz', '.nii', '.tsv', '.json'} + assert core_extensions.issubset(new_extensions), f"Missing core extensions from new config" + + def test_datatype_coverage_matches(self): + """Test that datatype patterns cover the same datatypes.""" + old_config = Config.load('bids') + new_config = Config.load('bids-schema') + + # Extract datatypes from patterns + old_datatypes = self._extract_datatypes_from_pattern(old_config.entities['datatype'].pattern) + new_datatypes = self._extract_datatypes_from_pattern(new_config.entities['datatype'].pattern) + + missing = old_datatypes - new_datatypes + extra = new_datatypes - old_datatypes + + error_msg = [] + if missing: + error_msg.append(f"Missing datatypes: {sorted(missing)}") + if extra: + error_msg.append(f"Extra datatypes: {sorted(extra)}") + + # Core datatypes should be preserved + core_datatypes = {'anat', 'func', 'dwi', 'fmap'} + assert core_datatypes.issubset(new_datatypes), f"Missing core datatypes; {'; '.join(error_msg)}" + + # New should have at least as many as old (schema grows) + assert len(new_datatypes) >= len(old_datatypes), f"Fewer datatypes than old config; {'; '.join(error_msg)}" + + def test_functional_filename_parsing_compatibility(self): + """Test that both configs can parse BIDS filenames and extract core entities.""" + old_config = Config.load('bids') + new_config = Config.load('bids-schema') + + # Test core entity extraction (skip extension due to pattern differences) + test_cases = [ + { + 'filename': '/dataset/sub-01/func/sub-01_task-rest_bold.nii.gz', + 'core_entities': {'subject': '01', 'task': 'rest', 'suffix': 'bold'} + }, + { + 'filename': '/dataset/sub-02/ses-pre/anat/sub-02_ses-pre_T1w.nii.gz', + 'core_entities': {'subject': '02', 'session': 'pre', 'suffix': 'T1w'} + } + ] + + for test_case in test_cases: + filename = test_case['filename'] + expected = test_case['core_entities'] + + old_entities = self._parse_filename_entities(filename, old_config) + new_entities = self._parse_filename_entities(filename, new_config) + + # Test that both configs extract the core BIDS entities + for key, expected_value in expected.items(): + old_extracted = old_entities.get(key) + new_extracted = new_entities.get(key) + + # Both should extract the same core information + assert old_extracted == expected_value, f"Old config failed to extract {key}={expected_value} from {filename}, got {old_extracted}" + assert new_extracted == expected_value, f"New config failed to extract {key}={expected_value} from {filename}, got {new_extracted}" + + # Test that new config has valid patterns + assert len(new_config.entities) > 20, "New config should have many entities" + assert 'subject' in new_config.entities, "New config missing subject entity" + assert 'suffix' in new_config.entities, "New config missing suffix entity" + + def test_new_config_parses_schema_compliant_filenames(self): + """Test that new config correctly parses filenames with schema entities.""" + new_config = Config.load('bids-schema') + + # Test files that use entities only available in schema (not old config) + schema_specific_cases = [ + '/dataset/sub-01/anat/sub-01_hemi-L_T1w.nii.gz', # hemisphere entity + '/dataset/sub-01/anat/sub-01_desc-preproc_T1w.nii.gz', # description entity + '/dataset/sub-01/anat/sub-01_res-native_T1w.nii.gz', # resolution entity + ] + + for filename in schema_specific_cases: + entities = self._parse_filename_entities(filename, new_config) + + # Should extract subject and suffix at minimum + assert 'subject' in entities, f"Failed to extract subject from {filename}" + assert 'suffix' in entities, f"Failed to extract suffix from {filename}" + + # Should extract the schema-specific entity + if 'hemi-' in filename: + assert 'hemisphere' in entities, f"Failed to extract hemisphere from {filename}" + if 'desc-' in filename: + assert 'description' in entities, f"Failed to extract description from {filename}" + if 'res-' in filename: + assert 'resolution' in entities, f"Failed to extract resolution from {filename}" + + def test_pattern_structure_similarity(self): + """Test that pattern structures are similar between old and new.""" + old_config = Config.load('bids') + new_config = Config.load('bids-schema') + + # Test key entities have reasonable pattern structures + key_entities = ['subject', 'session', 'task', 'run', 'suffix', 'extension'] + + for entity_name in key_entities: + if entity_name in old_config.entities and entity_name in new_config.entities: + old_pattern = old_config.entities[entity_name].pattern + new_pattern = new_config.entities[entity_name].pattern + + # Both should be non-empty + assert len(old_pattern) > 0, f"Old {entity_name} pattern is empty" + assert len(new_pattern) > 0, f"New {entity_name} pattern is empty" + + # Both should contain capturing groups + old_groups = len(re.findall(r'\([^)]*\)', old_pattern)) + new_groups = len(re.findall(r'\([^)]*\)', new_pattern)) + + assert old_groups > 0, f"Old {entity_name} pattern has no groups" + assert new_groups > 0, f"New {entity_name} pattern has no groups" + + def test_config_names_and_metadata(self): + """Test config metadata is reasonable.""" + old_config = Config.load('bids') + new_config = Config.load('bids-schema') + + # Old config should have simple name + assert old_config.name == 'bids' + + # New config should include version info + assert new_config.name.startswith('bids-schema-') + assert len(new_config.name) > len('bids-schema-') + + # Both should have reasonable entity counts + assert len(old_config.entities) > 10, "Old config has too few entities" + assert len(new_config.entities) > 10, "New config has too few entities" + + # Report entity count ratio for analysis + ratio = len(new_config.entities) / len(old_config.entities) + print(f"Entity count ratio (new/old): {ratio:.2f}") + + # Helper methods + def _extract_suffixes_from_pattern(self, pattern): + """Extract suffixes from a pattern like [_/\\]+(suffix1|suffix2|...)""" + suffixes = set() + + # Look for alternation groups + matches = re.findall(r'\(([^)]+)\)', pattern) + for match in matches: + if '|' in match: + suffixes.update(match.split('|')) + else: + suffixes.add(match) + + return suffixes + + def _extract_extensions_from_pattern(self, pattern): + """Extract extensions from a pattern.""" + extensions = set() + + # Look for patterns with dots + matches = re.findall(r'\\?\.\w+(?:\\?\.\w+)*', pattern) + for match in matches: + # Clean up regex escaping + clean_ext = match.replace('\\', '') + extensions.add(clean_ext) + + # Also look for alternation groups + alt_matches = re.findall(r'\(([^)]+)\)', pattern) + for match in alt_matches: + if '|' in match and '.' in match: + for item in match.split('|'): + if '.' in item: + clean_item = item.replace('\\', '') + extensions.add(clean_item) + + return extensions + + def _extract_datatypes_from_pattern(self, pattern): + """Extract datatypes from a pattern like [/\\]+(datatype1|datatype2|...)""" + datatypes = set() + + # Look for alternation groups + matches = re.findall(r'\(([^)]+)\)', pattern) + for match in matches: + if '|' in match: + datatypes.update(match.split('|')) + else: + datatypes.add(match) + + return datatypes + + def _parse_filename_entities(self, filename, config): + """Parse entities from filename using config patterns.""" + entities = {} + + for entity_name, entity in config.entities.items(): + if hasattr(entity, 'regex') and entity.regex: + match = entity.regex.search(filename) + if match and len(match.groups()) > 0: + entities[entity_name] = match.group(1) + + return entities + + def _map_entity_name_old_to_new(self, new_entity_name): + """Map new entity names to old entity names for compatibility testing.""" + # Old config sometimes used different names + name_mapping = { + 'subject': 'subject', + 'session': 'session', + 'task': 'task', + 'run': 'run', + 'acquisition': 'acquisition', + 'suffix': 'suffix', + 'extension': 'extension', + # Add other mappings as needed + } + return name_mapping.get(new_entity_name, new_entity_name) + + +if __name__ == '__main__': + # Allow running as script for quick testing + test_instance = TestConfigCompatibility() + test_instance.test_entity_coverage_matches() + test_instance.test_suffix_coverage_matches() + test_instance.test_filename_parsing_compatibility() diff --git a/src/bids/layout/tests/test_schema_accuracy.py b/src/bids/layout/tests/test_schema_accuracy.py new file mode 100644 index 00000000..ebfef9af --- /dev/null +++ b/src/bids/layout/tests/test_schema_accuracy.py @@ -0,0 +1,197 @@ +"""Tests to verify schema patterns match actual schema content.""" + +from bids.layout.models import Config +from bidsschematools import schema as bst_schema + + +class TestSchemaAccuracy: + """Test that patterns accurately reflect BIDS schema content.""" + + def test_extension_patterns_match_schema(self): + """Test that extension patterns match schema extensions.""" + config = Config.load('bids-schema') + + # Load schema directly + bids_schema = bst_schema.load_schema() + + # Get extension entity + extension_entity = config.entities['extension'] + + # Test that schema extensions are matched (excluding wildcards) + schema_extensions = [ext['value'] for ext in bids_schema.objects.extensions.values() + if ext.get('value') and ext['value'].startswith('.') and ext['value'] not in ('.*',)] + + # Collect all failures + matched_extensions = [] + unmatched_extensions = [] + + test_files = [f'test{ext}' for ext in schema_extensions] + for ext, test_file in zip(schema_extensions, test_files): + match = extension_entity.regex.search(test_file) + if match is not None: + matched_extensions.append(ext) + else: + unmatched_extensions.append(ext) + + # Report all differences at once + coverage = len(matched_extensions) / len(schema_extensions) * 100 if schema_extensions else 0 + + error_msg = [] + error_msg.append(f"Extension coverage: {coverage:.1f}% ({len(matched_extensions)}/{len(schema_extensions)})") + if unmatched_extensions: + error_msg.append(f"Unmatched extensions ({len(unmatched_extensions)}): {unmatched_extensions[:20]}") + if len(unmatched_extensions) > 20: + error_msg.append(f"... and {len(unmatched_extensions) - 20} more") + if matched_extensions: + error_msg.append(f"Matched extensions ({len(matched_extensions)}): {matched_extensions[:10]}") + if len(matched_extensions) > 10: + error_msg.append(f"... and {len(matched_extensions) - 10} more") + + assert len(unmatched_extensions) == 0, "; ".join(error_msg) + + def test_suffix_patterns_match_schema(self): + """Test that suffix patterns match schema suffixes.""" + config = Config.load('bids-schema') + + # Load schema directly + bids_schema = bst_schema.load_schema() + + # Get suffix entity + suffix_entity = config.entities['suffix'] + + # Test that schema suffixes are matched using actual suffix values, not keys + schema_suffix_values = [obj['value'] for obj in bids_schema.objects.suffixes.values() + if obj.get('value')] + + # Collect all failures + matched_suffixes = [] + unmatched_suffixes = [] + + # Test all suffixes using their actual values + test_files = [f'_{suffix}.nii.gz' for suffix in schema_suffix_values] + for suffix, test_file in zip(schema_suffix_values, test_files): + match = suffix_entity.regex.search(test_file) + if match is not None: + matched_suffixes.append(suffix) + else: + unmatched_suffixes.append(suffix) + + # Report all differences at once + coverage = len(matched_suffixes) / len(schema_suffix_values) * 100 if schema_suffix_values else 0 + + error_msg = [] + error_msg.append(f"Suffix coverage: {coverage:.1f}% ({len(matched_suffixes)}/{len(schema_suffix_values)})") + if unmatched_suffixes: + error_msg.append(f"Unmatched suffixes ({len(unmatched_suffixes)}): {unmatched_suffixes[:20]}") + if len(unmatched_suffixes) > 20: + error_msg.append(f"... and {len(unmatched_suffixes) - 20} more") + if matched_suffixes: + error_msg.append(f"Matched suffixes ({len(matched_suffixes)}): {matched_suffixes[:10]}") + if len(matched_suffixes) > 10: + error_msg.append(f"... and {len(matched_suffixes) - 10} more") + + assert len(unmatched_suffixes) == 0, "; ".join(error_msg) + + def test_datatype_patterns_match_schema(self): + """Test that datatype patterns match schema datatypes.""" + config = Config.load('bids-schema') + + # Load schema directly + bids_schema = bst_schema.load_schema() + + # Get datatype entity + datatype_entity = config.entities['datatype'] + + # Test that schema datatypes are matched + schema_datatypes = list(bids_schema.objects.datatypes.keys()) + + # Collect all failures + matched_datatypes = [] + unmatched_datatypes = [] + + test_paths = [f'/{dtype}/' for dtype in schema_datatypes] + for dtype, test_path in zip(schema_datatypes, test_paths): + match = datatype_entity.regex.search(test_path) + if match is not None: + matched_datatypes.append(dtype) + else: + unmatched_datatypes.append(dtype) + + # Report all differences at once + coverage = len(matched_datatypes) / len(schema_datatypes) * 100 if schema_datatypes else 0 + + error_msg = [] + error_msg.append(f"Datatype coverage: {coverage:.1f}% ({len(matched_datatypes)}/{len(schema_datatypes)})") + if unmatched_datatypes: + error_msg.append(f"Unmatched datatypes ({len(unmatched_datatypes)}): {unmatched_datatypes}") + if matched_datatypes: + error_msg.append(f"Matched datatypes ({len(matched_datatypes)}): {matched_datatypes}") + + assert len(unmatched_datatypes) == 0, "; ".join(error_msg) + + def test_entity_format_patterns_match_schema(self): + """Test that entity patterns use actual schema format patterns.""" + config = Config.load('bids-schema') + + # Load schema directly + bids_schema = bst_schema.load_schema() + + # Test a few key entities + test_entities = [ + ('subject', 'label'), + ('run', 'index'), + ('echo', 'index'), + ] + + for entity_name, expected_format in test_entities: + if entity_name in config.entities: + entity = config.entities[entity_name] + + # Get expected pattern from schema + format_obj = bids_schema.objects.formats[expected_format] + expected_pattern = format_obj['pattern'] + + # Check that our entity pattern contains the schema pattern + assert expected_pattern in entity.pattern, \ + f"Entity {entity_name} pattern doesn't contain schema format pattern" + + def test_no_hardcoded_values(self): + """Test that we don't have hardcoded schema values anymore.""" + config = Config.load('bids-schema') + + # Load schema directly to get actual counts + bids_schema = bst_schema.load_schema() + + # We should have more entities than before because we're not missing any + assert len(config.entities) >= len(bids_schema.objects.entities) + + # Extension pattern should include schema extensions + extension_entity = config.entities['extension'] + schema_extension_count = len([ext for ext in bids_schema.objects.extensions.values() + if ext.get('value') and ext['value'].startswith('.')]) + + # Pattern should include many extensions (not just a generic catch-all) + assert len(extension_entity.pattern) > 100, "Extension pattern seems too simple" + + # Suffix pattern should include schema suffixes + suffix_entity = config.entities['suffix'] + schema_suffix_count = len(bids_schema.objects.suffixes) + + # Pattern should include many suffixes + assert len(suffix_entity.pattern) > 200, "Suffix pattern seems too simple" + + print(f"Schema extensions used: {schema_extension_count}") + print(f"Schema suffixes used: {schema_suffix_count}") + print(f"Schema datatypes used: {len(bids_schema.objects.datatypes)}") + print(f"Schema entities used: {len(bids_schema.objects.entities)}") + + +if __name__ == '__main__': + # Allow running as script for quick testing + test_instance = TestSchemaAccuracy() + test_instance.test_extension_patterns_match_schema() + test_instance.test_suffix_patterns_match_schema() + test_instance.test_datatype_patterns_match_schema() + test_instance.test_entity_format_patterns_match_schema() + test_instance.test_no_hardcoded_values() + print("All schema accuracy tests passed!") \ No newline at end of file diff --git a/src/bids/layout/tests/test_schema_config.py b/src/bids/layout/tests/test_schema_config.py new file mode 100644 index 00000000..088d456d --- /dev/null +++ b/src/bids/layout/tests/test_schema_config.py @@ -0,0 +1,116 @@ +"""Tests for schema-based Config loading.""" + +import pytest + +from bids.layout.models import Config + + +class TestSchemaConfig: + """Test schema-based configuration loading.""" + + def test_load_bids_schema_basic(self): + """Test loading config from BIDS schema.""" + config = Config.load('bids-schema') + + # Basic checks + assert config.name.startswith('bids-schema-') + assert len(config.entities) > 0 + + # Check that standard entities are present (using full names per PyBIDS convention) + entity_names = {e.name for e in config.entities.values()} + expected_entities = {'subject', 'session', 'task', 'run'} + assert expected_entities.issubset(entity_names) + + # Check that PyBIDS-specific entities are present + pybids_entities = {'extension', 'suffix', 'datatype'} + assert pybids_entities.issubset(entity_names) + + def test_schema_entity_patterns(self): + """Test that entity patterns are correctly generated.""" + config = Config.load('bids-schema') + + # Test subject entity (directory-based) + if 'subject' in config.entities: + subject_entity = config.entities['subject'] + assert subject_entity.regex is not None + # Should match /sub-01/ or similar + match = subject_entity.regex.search('/sub-01/') + assert match is not None + assert match.group(1) == '01' + + # Test task entity (file-based) + if 'task' in config.entities: + task_entity = config.entities['task'] + assert task_entity.regex is not None + # Should match _task-rest_ or similar + match = task_entity.regex.search('_task-rest_') + assert match is not None + assert match.group(1) == 'rest' + + def test_schema_version_tracking(self): + """Test that schema version is tracked in config name.""" + config = Config.load('bids-schema') + + # Config name should include version + assert 'bids-schema-' in config.name + # Should have some version number after the dash + version_part = config.name.split('bids-schema-')[1] + assert len(version_part) > 0 + + def test_schema_version_parameter(self): + """Test loading specific schema version (future feature).""" + # This should work but use the default schema for now + config = Config.load({'schema_version': '1.9.0'}) + + assert config.name.startswith('bids-schema-') + assert len(config.entities) > 0 + + + def test_entity_generation(self): + """Test entity pattern generation using real schema.""" + config = Config.load('bids-schema') + + # Test that entities were generated + assert len(config.entities) > 0 + + # Test specific entity properties we care about (using full names) + assert 'subject' in config.entities + assert 'task' in config.entities + assert 'run' in config.entities + + # Test that patterns are valid regex + for entity_name, entity in config.entities.items(): + assert entity.regex is not None, f"Entity {entity_name} missing regex" + # Test that regex compiles and works + assert hasattr(entity.regex, 'search'), f"Entity {entity_name} regex invalid" + + # Test specific entity patterns work correctly + subject_entity = config.entities['subject'] + assert 'sub-' in subject_entity.pattern + + # Test subject pattern matches expected format + match = subject_entity.regex.search('/sub-01/') + assert match is not None + assert match.group(1) == '01' + + # Test task pattern matches expected format + task_entity = config.entities['task'] + match = task_entity.regex.search('_task-rest_') + assert match is not None + assert match.group(1) == 'rest' + + # Test run pattern matches expected format + run_entity = config.entities['run'] + match = run_entity.regex.search('_run-02_') + assert match is not None + assert match.group(1) == '02' + + + +if __name__ == '__main__': + # Allow running as script for quick testing + test_instance = TestSchemaConfig() + test_instance.test_load_bids_schema_basic() + test_instance.test_schema_entity_patterns() + test_instance.test_schema_version_tracking() + print("Basic tests passed!") \ No newline at end of file From 8aa069c8711fe5d13e805d1e54a935d43205c597 Mon Sep 17 00:00:00 2001 From: Ashley Stewart Date: Tue, 9 Sep 2025 16:03:09 +1000 Subject: [PATCH 02/10] Support specifying bids schema version --- pyproject.toml | 2 +- src/bids/layout/layout.py | 3 +- src/bids/layout/models.py | 36 ++++++- .../layout/tests/test_config_compatibility.py | 62 ----------- src/bids/layout/tests/test_schema_config.py | 8 +- .../tests/test_schema_version_differences.py | 100 ++++++++++++++++++ 6 files changed, 142 insertions(+), 69 deletions(-) create mode 100644 src/bids/layout/tests/test_schema_version_differences.py diff --git a/pyproject.toml b/pyproject.toml index 18aad135..962c4634 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -30,7 +30,7 @@ dependencies = [ "formulaic >=0.3", "sqlalchemy >=1.4.31", "bids-validator>=1.14.7", # Keep up-to-date to ensure support for recent modalities - "bidsschematools>=0.10", # Required for schema-based configs + "bidsschematools>=1.1.0", # Required for schema-based configs "num2words >=0.5.10", "click >=8.0", "universal_pathlib >=0.2.2", diff --git a/src/bids/layout/layout.py b/src/bids/layout/layout.py index afa2c399..73b5b1e8 100644 --- a/src/bids/layout/layout.py +++ b/src/bids/layout/layout.py @@ -76,7 +76,8 @@ class BIDSLayout: file in order to be indexed. config : str or list or None, optional Optional name(s) of configuration file(s) to use. - By default (None), uses 'bids'. + By default (None), uses 'bids'. Can also be 'bids-schema' to use + the official BIDS schema instead of JSON config files. sources : :obj:`bids.layout.BIDSLayout` or list or None, optional Optional BIDSLayout(s) from which the current BIDSLayout is derived. config_filename : str diff --git a/src/bids/layout/models.py b/src/bids/layout/models.py index 48f5a285..d2d1a89c 100644 --- a/src/bids/layout/models.py +++ b/src/bids/layout/models.py @@ -165,7 +165,8 @@ def load(cls, config, session=None): * A path to a JSON file containing config information * A dictionary containing config information * 'bids-schema' to load from the official BIDS schema - * dict with 'schema_version' key to load specific schema version + * dict with 'schema_version' key to load specific BIDS version + (e.g., {'schema_version': '1.9.0'}) session : :obj:`sqlalchemy.orm.session.Session` or None An optional SQLAlchemy Session instance. If passed, the session is used to check the database for (and @@ -206,10 +207,39 @@ def load(cls, config, session=None): @classmethod def _from_schema(cls, schema_version=None, session=None): - """Load config from BIDS schema - store patterns directly.""" + """Load config from BIDS schema. + + Parameters + ---------- + schema_version : str, optional + Specific BIDS specification version to load (e.g., "1.9.0", "1.8.0"). + If None, uses the schema version bundled with bidsschematools. + session : :obj:`sqlalchemy.orm.session.Session` or None + An optional SQLAlchemy Session instance. + + Returns + ------- + Config + A Config instance populated with entities and patterns + extracted from the BIDS schema. + """ from bidsschematools import rules, schema as bidsschema + from upath import UPath - bids_schema = bidsschema.load_schema() + # Load schema - either default or specific version + if schema_version is None: + bids_schema = bidsschema.load_schema() + else: + # Load specific schema version from versioned URL + schema_url = UPath(f"https://bids-specification.readthedocs.io/en/v{schema_version}/schema.json") + try: + bids_schema = bidsschema.load_schema(schema_url) + except Exception as e: + raise ValueError( + f"Failed to load BIDS schema version {schema_version}. " + f"Check that the version exists at {schema_url}. " + f"Error: {e}" + ) # Collect ALL patterns from bidsschematools (keep existing collection logic) all_patterns = [] diff --git a/src/bids/layout/tests/test_config_compatibility.py b/src/bids/layout/tests/test_config_compatibility.py index b9904f04..f47b3d4a 100644 --- a/src/bids/layout/tests/test_config_compatibility.py +++ b/src/bids/layout/tests/test_config_compatibility.py @@ -45,37 +45,6 @@ def test_schema_entities_are_superset_of_valid_old_entities(self): assert len(missing_from_new) == 0, f"Schema missing valid entities: {missing_from_new}" - def test_suffix_coverage_matches(self): - """Test that suffix patterns cover the same suffixes.""" - #pytest.skip("Old config uses generic catch-all pattern while new config uses explicit schema suffixes. " - # "Cannot meaningfully compare pattern extraction. See test_new_suffixes_compatible_with_old_pattern instead.") - - # Original implementation preserved for reference - old_config = Config.load('bids') - new_config = Config.load('bids-schema') - - # Extract suffixes from patterns - old_suffixes = self._extract_suffixes_from_pattern(old_config.entities['suffix'].pattern) - new_suffixes = self._extract_suffixes_from_pattern(new_config.entities['suffix'].pattern) - - missing = old_suffixes - new_suffixes - extra = new_suffixes - old_suffixes - - error_msg = [] - if missing: - error_msg.append(f"Missing suffixes: {sorted(missing)}") - if extra: - error_msg.append(f"Extra suffixes: {sorted(extra)}") - - # Core BIDS suffixes should be preserved - core_suffixes = {'bold', 'T1w', 'T2w', 'dwi', 'events'} - assert core_suffixes.issubset(new_suffixes), f"Missing core suffixes; {'; '.join(error_msg)}" - - # Report coverage for analysis - coverage = len(old_suffixes & new_suffixes) / len(old_suffixes) if old_suffixes else 0 - if coverage < 1.0 or missing or extra: - pytest.fail(f"Suffix coverage: {coverage:.1%}; {'; '.join(error_msg)}") - def test_new_suffixes_compatible_with_old_pattern(self): """Test that all new schema suffixes would be accepted by old generic pattern.""" old_config = Config.load('bids') @@ -94,37 +63,6 @@ def test_new_suffixes_compatible_with_old_pattern(self): assert len(incompatible_suffixes) == 0, f"Old pattern would reject these valid schema suffixes: {incompatible_suffixes}" - def test_extension_coverage_matches(self): - """Test that extension patterns cover the same extensions.""" - #pytest.skip("Old config uses generic catch-all pattern while new config uses explicit schema extensions. " - # "Cannot meaningfully compare pattern extraction. See test_new_extensions_compatible_with_old_pattern instead.") - - # Original implementation preserved for reference - old_config = Config.load('bids') - new_config = Config.load('bids-schema') - - # Extract extensions from patterns - old_extensions = self._extract_extensions_from_pattern(old_config.entities['extension'].pattern) - new_extensions = self._extract_extensions_from_pattern(new_config.entities['extension'].pattern) - - missing = old_extensions - new_extensions - extra = new_extensions - old_extensions - - error_msg = [] - if missing: - error_msg.append(f"Missing extensions: {sorted(missing)}") - if extra: - error_msg.append(f"Extra extensions: {sorted(extra)}") - - # Core extensions should be preserved - core_extensions = {'.nii.gz', '.nii', '.tsv', '.json'} - assert core_extensions.issubset(new_extensions), f"Missing core extensions; {'; '.join(error_msg)}" - - # Report coverage for analysis - coverage = len(old_extensions & new_extensions) / len(old_extensions) if old_extensions else 0 - if coverage < 1.0 or missing or extra: - pytest.fail(f"Extension coverage: {coverage:.1%}; {'; '.join(error_msg)}") - def test_new_extensions_compatible_with_old_pattern(self): """Test that all new schema extensions would be accepted by old generic pattern.""" old_config = Config.load('bids') diff --git a/src/bids/layout/tests/test_schema_config.py b/src/bids/layout/tests/test_schema_config.py index 088d456d..21abd249 100644 --- a/src/bids/layout/tests/test_schema_config.py +++ b/src/bids/layout/tests/test_schema_config.py @@ -58,12 +58,15 @@ def test_schema_version_tracking(self): assert len(version_part) > 0 def test_schema_version_parameter(self): - """Test loading specific schema version (future feature).""" - # This should work but use the default schema for now + """Test loading specific schema version.""" + # Test loading BIDS v1.9.0 schema config = Config.load({'schema_version': '1.9.0'}) assert config.name.startswith('bids-schema-') assert len(config.entities) > 0 + # Should include the version in the name + assert '1.9.0' in config.name + def test_entity_generation(self): @@ -113,4 +116,5 @@ def test_entity_generation(self): test_instance.test_load_bids_schema_basic() test_instance.test_schema_entity_patterns() test_instance.test_schema_version_tracking() + test_instance.test_schema_version_parameter() print("Basic tests passed!") \ No newline at end of file diff --git a/src/bids/layout/tests/test_schema_version_differences.py b/src/bids/layout/tests/test_schema_version_differences.py new file mode 100644 index 00000000..37da8cf7 --- /dev/null +++ b/src/bids/layout/tests/test_schema_version_differences.py @@ -0,0 +1,100 @@ +"""Tests for differences between BIDS schema versions.""" + +import pytest +import tempfile +import json +import os +from pathlib import Path + +from bids import BIDSLayout +from bids.layout.models import Config + + +class TestSchemaVersionDifferences: + """Test that different schema versions handle different features correctly.""" + + def test_entity_parsing_version_differences(self): + """Test that tracksys entity (added in v1.9.0) works in newer schemas but fails in v1.8.0.""" + + with tempfile.TemporaryDirectory() as temp_dir: + dataset_dir = Path(temp_dir) / "test_dataset" + dataset_dir.mkdir() + + # Create minimal dataset with motion file using tracksys entity + with open(dataset_dir / "dataset_description.json", "w") as f: + json.dump({"Name": "Test", "BIDSVersion": "1.10.1", "Authors": ["Test"]}, f) + + motion_dir = dataset_dir / "sub-01" / "motion" + motion_dir.mkdir(parents=True) + (motion_dir / "sub-01_tracksys-imu_motion.tsv").touch() + + # Test tracksys query across schema versions + def test_tracksys_query(config): + layout = BIDSLayout(dataset_dir, config=[config]) + try: + return len(layout.get(tracksys='imu', return_type='filename')) > 0 + except Exception: + return False + + # tracksys entity was added in v1.9.0 for motion datatype + assert test_tracksys_query('bids-schema'), "Current schema should support tracksys" + assert test_tracksys_query({'schema_version': '1.9.0'}), "v1.9.0 should support tracksys (motion added)" + assert not test_tracksys_query({'schema_version': '1.8.0'}), "v1.8.0 should NOT support tracksys" + + + def test_motion_datatype_evolution(self): + """Test that motion datatype (BEP029) support was added in v1.9.0.""" + + # Motion datatype was added in v1.9.0 according to changelog: + # v1.9.0: [ENH] Extend BIDS for Motion data (BEP029) #981 + + config_v190 = Config.load({'schema_version': '1.9.0'}) + config_v180 = Config.load({'schema_version': '1.8.0'}) + + entities_v190 = {e.name for e in config_v190.entities.values()} + entities_v180 = {e.name for e in config_v180.entities.values()} + + # Motion-specific entity 'tracksys' should be in v1.9.0+ but not v1.8.0 + assert 'tracksys' in entities_v190, "tracksys entity should exist in v1.9.0 (motion datatype was added)" + assert 'tracksys' not in entities_v180, "tracksys entity should NOT exist in v1.8.0 (before motion datatype)" + + print(f"✓ Motion datatype evolution verified:") + print(f" v1.8.0 (before motion): tracksys = {'tracksys' in entities_v180}") + print(f" v1.9.0 (motion added): tracksys = {'tracksys' in entities_v190}") + + # Test specific motion-related entities that were added + motion_entities = {'tracksys'} # Could expand this list as more motion entities are identified + + for entity in motion_entities: + assert entity in entities_v190, f"Motion entity '{entity}' should exist in v1.9.0+" + assert entity not in entities_v180, f"Motion entity '{entity}' should NOT exist in v1.8.0" + + def test_schema_version_metadata_differences(self): + """Test that schema versions have different BIDS version numbers.""" + + # Load different schema versions + config_current = Config.load('bids-schema') + config_v190 = Config.load({'schema_version': '1.9.0'}) + config_v180 = Config.load({'schema_version': '1.8.0'}) + + # Check that config names reflect the versions + assert 'bids-schema-' in config_current.name + assert '1.9.0' in config_v190.name + assert '1.8.0' in config_v180.name + + print(f"Current config: {config_current.name}") + print(f"v1.9.0 config: {config_v190.name}") + print(f"v1.8.0 config: {config_v180.name}") + + # Verify they're different configurations + assert config_current.name != config_v190.name + assert config_v190.name != config_v180.name + + +if __name__ == '__main__': + # Allow running as script for quick testing + test_instance = TestSchemaVersionDifferences() + test_instance.test_schema_version_entity_differences() + test_instance.test_motion_datatype_evolution() + test_instance.test_schema_version_metadata_differences() + print("Schema version difference tests completed!") \ No newline at end of file From b051698f18311c968e097d4899f7d7bfe8a956ce Mon Sep 17 00:00:00 2001 From: Ashley Stewart Date: Tue, 9 Sep 2025 16:12:58 +1000 Subject: [PATCH 03/10] update names --- ...{test_schema_accuracy.py => test_schema_pattern_validation.py} | 0 ...test_config_compatibility.py => test_schema_vs_json_config.py} | 0 2 files changed, 0 insertions(+), 0 deletions(-) rename src/bids/layout/tests/{test_schema_accuracy.py => test_schema_pattern_validation.py} (100%) rename src/bids/layout/tests/{test_config_compatibility.py => test_schema_vs_json_config.py} (100%) diff --git a/src/bids/layout/tests/test_schema_accuracy.py b/src/bids/layout/tests/test_schema_pattern_validation.py similarity index 100% rename from src/bids/layout/tests/test_schema_accuracy.py rename to src/bids/layout/tests/test_schema_pattern_validation.py diff --git a/src/bids/layout/tests/test_config_compatibility.py b/src/bids/layout/tests/test_schema_vs_json_config.py similarity index 100% rename from src/bids/layout/tests/test_config_compatibility.py rename to src/bids/layout/tests/test_schema_vs_json_config.py From fa01063242cc7d614a4c5ed37dade64a3508938d Mon Sep 17 00:00:00 2001 From: Ashley Stewart Date: Tue, 9 Sep 2025 16:40:39 +1000 Subject: [PATCH 04/10] Remove problematic regex parsing code --- src/bids/layout/models.py | 187 +++++++++----------------------------- 1 file changed, 42 insertions(+), 145 deletions(-) diff --git a/src/bids/layout/models.py b/src/bids/layout/models.py index d2d1a89c..272732d8 100644 --- a/src/bids/layout/models.py +++ b/src/bids/layout/models.py @@ -305,7 +305,7 @@ def _extract_entity_names_from_rules(cls, bids_schema): @classmethod def _create_entities_from_schema(cls, entity_names, entity_values, bids_schema): - """Create Entity objects from schema information.""" + """Create Entity objects directly from schema information.""" entities = [] for entity_name in sorted(entity_names): @@ -316,11 +316,11 @@ def _create_entities_from_schema(cls, entity_names, entity_values, bids_schema): special_entities = {'suffix', 'extension', 'datatype'} if entity_name in special_entities: - # These entities don't have prefixes - they appear directly + # These entities use explicit value lists from schema rules if entity_name in entity_values and entity_values[entity_name]: # Filter out problematic values for extension entity if entity_name == 'extension': - # Remove empty string and wildcard patterns that would match everything + # Remove empty string and wildcard patterns filtered_values = {v for v in entity_values[entity_name] if v and v not in ('.*', '**', '*')} values = '|'.join(sorted(filtered_values)) @@ -333,20 +333,31 @@ def _create_entities_from_schema(cls, entity_names, entity_values, bids_schema): pattern = fr'/({values})/' # Datatypes are directory names else: # suffix pattern = fr'[_/\\\\]({values})\.?' # Before extension, with optional dot + else: + # Fallback for special entities without values + if entity_name == 'extension': + pattern = fr'(\.[^/\\\\]+)\Z' + elif entity_name == 'datatype': + pattern = fr'/([a-z]+)/' + else: # suffix + pattern = fr'[_/\\\\]([a-zA-Z0-9]+)\.?' else: - # Regular entities - use schema format patterns directly! + # Regular BIDS entities - use schema format patterns directly bids_prefix = entity_spec.get('name', entity_name) format_type = entity_spec.get('format', 'label') # Get the official value pattern from schema formats + value_pattern = '[0-9a-zA-Z]+' # Default pattern if format_type in bids_schema.objects.formats: format_obj = dict(bids_schema.objects.formats[format_type]) - value_pattern = format_obj['pattern'] # [0-9a-zA-Z]+, [0-9]+, etc. + value_pattern = format_obj.get('pattern', value_pattern) - # Combine prefix + schema format pattern + # Determine separator based on entity type if entity_name in ['subject', 'session']: + # Directory-level entities pattern = fr'[/\\\\]+{bids_prefix}-({value_pattern})' else: + # File-level entities pattern = fr'[_/\\\\]+{bids_prefix}-({value_pattern})' entity_config = { @@ -392,110 +403,36 @@ def _extract_entity_values_from_rules(cls, bids_schema, entity_names): - @staticmethod - def _extract_separator_from_context(entity_name, before_context, after_context, full_pattern, schema): - """Extract separator pattern from schema context. + @staticmethod + def _get_entity_separator_from_schema(entity_name, bids_schema): + """Get entity separator pattern directly from schema definition. - Analyzes the actual schema pattern context to determine what - separators are used for this entity by examining the schema regex - and entity classification. + Uses schema entity definitions and directory rules to determine + the correct separator pattern without regex parsing. """ - # Find the entity in the full pattern and extract its actual context - entity_match = re.search(rf'(\S*?)(\(\?P<{entity_name}>[^)]+\))', full_pattern) - if not entity_match: - return f"[_/\\\\]+{entity_name}-" # Fallback - - prefix_context = entity_match.group(1) - - # Determine entity type from schema - is it a real entity or special construct? - is_real_entity = entity_name in schema.objects.entities - - if not is_real_entity: - # This is a schema construct (extension/suffix/datatype capturing group) - # Analyze the pattern context to determine positioning rules - - # Analyze from the actual pattern context - if '/' in before_context and '/' in after_context: - # Appears between directory separators (like datatype) - return "[/\\\\]+" - elif before_context.endswith(')'): - # Comes directly after another capturing group (like extension after suffix) - return "" - elif '_)?' in before_context or 'chunk)_)?' in before_context: - # Comes after underscore separators from entities (like suffix) - return "[_/\\\\]+" - else: - # Analyze position in full pattern to determine context - entity_pos = full_pattern.find(f'(?P<{entity_name}>') - pattern_before = full_pattern[:entity_pos] - - # Count directory structure elements before this construct - slash_count = pattern_before.count('/') - - if slash_count <= 2: - # In directory part - use slash separators - return "[/\\\\]+" - else: - # In filename part - analyze further - if ')(?P<' in pattern_before[-20:]: - # Directly after another construct - return "" - else: - # After separator - return "[_/\\\\]+" - - # Extract the actual prefix used in the schema pattern - # Look for patterns like 'sub-(?P' or '_task-(?P' - prefix_pattern = re.search(rf'([/_]?)([a-zA-Z]+)-\(\?P<{entity_name}>', full_pattern) - if prefix_pattern: - separator_char = prefix_pattern.group(1) - prefix = prefix_pattern.group(2) - - if separator_char == '/': - return f"[/\\\\]+{prefix}-" - elif separator_char == '_': - return f"[_/\\\\]+{prefix}-" - else: # Direct prefix (no separator before) - # Check position to determine separator type - entity_pos = full_pattern.find(f'(?P<{entity_name}>') - pattern_before = full_pattern[:entity_pos] - slash_count = pattern_before.count('/') - - if slash_count <= 2: # Directory part - return f"[/\\\\]+{prefix}-" - else: # Filename part - return f"[_/\\\\]+{prefix}-" + # Handle special entities that don't have BIDS prefixes + special_entities = {'suffix', 'extension', 'datatype'} + if entity_name in special_entities: + if entity_name == 'extension': + return '' # Extensions appear at end with no separator + elif entity_name == 'datatype': + return '/' # Datatypes are directory names + else: # suffix + return '[_/\\\\]+' # Suffixes have underscore separators - # Try to extract any entity prefix from before_context - prefix_match = re.search(r'([a-zA-Z]+)-$', before_context) - if prefix_match: - prefix = prefix_match.group(1) - # Verify this is a real entity prefix from schema - for entity in schema.objects.entities.values(): - if entity.get('name') == prefix: - if '/' in before_context: - return f"[/\\\\]+{prefix}-" - else: - return f"[_/\\\\]+{prefix}-" - - # For entities with prefixes, extract from schema pattern - broad_match = re.search(rf'([a-zA-Z]+)-\(\?P<{entity_name}>', full_pattern) - if broad_match: - prefix = broad_match.group(1) - - # Determine separator type from position in pattern - entity_pos = full_pattern.find(f'(?P<{entity_name}>') - pattern_before_entity = full_pattern[:entity_pos] - slash_count = pattern_before_entity.count('/') + # Regular BIDS entities - get info from schema + if entity_name not in bids_schema.objects.entities: + return f'[_/\\\\]+{entity_name}-' # Fallback - if slash_count <= 2: # Directory part - return f"[/\\\\]+{prefix}-" - else: # Filename part - return f"[_/\\\\]+{prefix}-" + entity_spec = bids_schema.objects.entities[entity_name] + bids_prefix = entity_spec.get('name', entity_name) - # If no prefix pattern found, this entity shouldn't have a prefix - # This handles cases where entities appear directly without prefix- - return f"[_/\\\\]+{entity_name}-" # Final fallback + # Directory-level entities (subject, session) use slash separators + if entity_name in ['subject', 'session']: + return f'[/\\\\]+{bids_prefix}-' + else: + # File-level entities use underscore separators + return f'[_/\\\\]+{bids_prefix}-' @staticmethod def _extract_directory_config(entity_name, pattern, schema): @@ -541,46 +478,6 @@ def _extract_directory_config(entity_name, pattern, schema): return None - @staticmethod - def _entity_appears_in_all_modalities(entity_name, schema): - """Check if an entity appears in ALL modalities in the schema. - - Entities that appear in all modalities (like suffix, extension) should have - their values combined across all modalities to ensure complete coverage. - - Parameters - ---------- - entity_name : str - The entity name to check - schema : object - The BIDS schema object - - Returns - ------- - bool - True if entity appears in all modalities, False otherwise - """ - # Get all modalities - all_modalities = list(schema.rules.files.raw.keys()) - if not all_modalities: - return False - - modalities_with_entity = set() - - # Check each modality for this entity - for modality in all_modalities: - datatype_rules = getattr(schema.rules.files.raw, modality) - patterns = rules.regexify_filename_rules(datatype_rules, schema, level=1) - - # Check if entity appears in any pattern for this modality - for pattern in patterns: - entities_in_pattern = re.findall(r'\(\?P<([^>]+)>', pattern['regex']) - if entity_name in entities_in_pattern: - modalities_with_entity.add(modality) - break # Found in this modality, move to next - - # Return True if entity appears in ALL modalities - return len(modalities_with_entity) == len(all_modalities) From fd07a267cfee5db6abd5b580ef46709359d7b0a6 Mon Sep 17 00:00:00 2001 From: Ashley Stewart Date: Tue, 9 Sep 2025 16:44:20 +1000 Subject: [PATCH 05/10] Remove unnecessary fallbacks --- src/bids/layout/layout.py | 6 ------ 1 file changed, 6 deletions(-) diff --git a/src/bids/layout/layout.py b/src/bids/layout/layout.py index 73b5b1e8..d447bce2 100644 --- a/src/bids/layout/layout.py +++ b/src/bids/layout/layout.py @@ -422,9 +422,6 @@ def _find_directories_for_entity_using_schema(self, entity_name, results): # Load schema directory rules bids_schema = bst_schema.load_schema() - if not hasattr(bids_schema.rules, 'directories') or not hasattr(bids_schema.rules.directories, 'raw'): - # Fallback to simple approach if no schema rules - return [f.dirname for f in results if entity_name in f.entities] directory_rules = bids_schema.rules.directories.raw directories = set() @@ -438,9 +435,6 @@ def _find_directories_for_entity_using_schema(self, entity_name, results): # Convert to full directory path directory_path = os.path.join(self.root, *path_components) directories.add(directory_path) - else: - # Fallback to actual path if schema reconstruction fails - directories.add(f.dirname) return list(directories) From 14098637360c2145c2e352fbe36604f21eeb2845 Mon Sep 17 00:00:00 2001 From: Ashley Stewart Date: Tue, 9 Sep 2025 16:50:32 +1000 Subject: [PATCH 06/10] Remove unnecessary fallback --- src/bids/layout/models.py | 16 ++++------------ 1 file changed, 4 insertions(+), 12 deletions(-) diff --git a/src/bids/layout/models.py b/src/bids/layout/models.py index 272732d8..21636631 100644 --- a/src/bids/layout/models.py +++ b/src/bids/layout/models.py @@ -264,7 +264,7 @@ def _from_schema(cls, schema_version=None, session=None): # Store patterns directly - NO PARSING EVER config_name = f'bids-schema-{bids_schema.bids_version}' - # Extract entities directly from schema rules - no regex parsing! + # Extract entities directly from schema rules entity_names = cls._extract_entity_names_from_rules(bids_schema) # Extract actual values for key entities from schema rules directly @@ -279,7 +279,7 @@ def _from_schema(cls, schema_version=None, session=None): @classmethod def _extract_entity_names_from_rules(cls, bids_schema): - """Extract entity names directly from schema rules - no regex parsing!""" + """Extract entity names directly from schema rules""" # Get entity names from all rule sections entity_names = set() @@ -333,14 +333,6 @@ def _create_entities_from_schema(cls, entity_names, entity_values, bids_schema): pattern = fr'/({values})/' # Datatypes are directory names else: # suffix pattern = fr'[_/\\\\]({values})\.?' # Before extension, with optional dot - else: - # Fallback for special entities without values - if entity_name == 'extension': - pattern = fr'(\.[^/\\\\]+)\Z' - elif entity_name == 'datatype': - pattern = fr'/([a-z]+)/' - else: # suffix - pattern = fr'[_/\\\\]([a-zA-Z0-9]+)\.?' else: # Regular BIDS entities - use schema format patterns directly bids_prefix = entity_spec.get('name', entity_name) @@ -372,7 +364,7 @@ def _create_entities_from_schema(cls, entity_names, entity_values, bids_schema): @classmethod def _extract_entity_values_from_rules(cls, bids_schema, entity_names): - """Extract entity values directly from schema rules - no regex parsing!""" + """Extract entity values directly from schema rules""" entity_values = {name: set() for name in entity_names} file_sections = { @@ -408,7 +400,7 @@ def _get_entity_separator_from_schema(entity_name, bids_schema): """Get entity separator pattern directly from schema definition. Uses schema entity definitions and directory rules to determine - the correct separator pattern without regex parsing. + the correct separator pattern. """ # Handle special entities that don't have BIDS prefixes special_entities = {'suffix', 'extension', 'datatype'} From d44bb7b875c0bc190e899ca4c4010f740e8a8040 Mon Sep 17 00:00:00 2001 From: "Christopher J. Markiewicz" Date: Tue, 23 Sep 2025 13:06:45 -0400 Subject: [PATCH 07/10] [DATALAD RUNCMD] uv lock === Do not change lines below === { "chain": [], "cmd": "uv lock", "exit": 0, "extra_inputs": [], "inputs": [ "pyproject.toml" ], "outputs": [ "uv.lock" ], "pwd": "." } ^^^ Do not change lines above ^^^ --- uv.lock | 2 ++ 1 file changed, 2 insertions(+) diff --git a/uv.lock b/uv.lock index 3e75fc38..f1d80117 100644 --- a/uv.lock +++ b/uv.lock @@ -2289,6 +2289,7 @@ name = "pybids" source = { editable = "." } dependencies = [ { name = "bids-validator" }, + { name = "bidsschematools" }, { name = "click", version = "8.1.8", source = { registry = "https://pypi.org/simple" }, marker = "python_full_version < '3.10'" }, { name = "click", version = "8.3.0", source = { registry = "https://pypi.org/simple" }, marker = "python_full_version >= '3.10'" }, { name = "formulaic" }, @@ -2389,6 +2390,7 @@ requires-dist = [ { name = "altair", marker = "extra == 'model-reports'" }, { name = "altair", marker = "extra == 'test'", specifier = ">=5" }, { name = "bids-validator", specifier = ">=1.14.7" }, + { name = "bidsschematools", specifier = ">=1.1.0" }, { name = "bsmschema", marker = "extra == 'test'", specifier = ">=0.1" }, { name = "click", specifier = ">=8.0" }, { name = "coverage", extras = ["toml"], marker = "extra == 'test'", specifier = ">=5.2.1" }, From 71c003b3f6d4609afe57d1ab2f5e1d4700bb8e27 Mon Sep 17 00:00:00 2001 From: Ashley Stewart Date: Thu, 30 Oct 2025 09:46:46 +1000 Subject: [PATCH 08/10] Remove unused functions and add tests --- src/bids/layout/models.py | 77 --------------------- src/bids/layout/tests/test_schema_config.py | 49 ++++++++++--- 2 files changed, 38 insertions(+), 88 deletions(-) diff --git a/src/bids/layout/models.py b/src/bids/layout/models.py index 21636631..49ad1098 100644 --- a/src/bids/layout/models.py +++ b/src/bids/layout/models.py @@ -392,83 +392,6 @@ def _extract_entity_values_from_rules(cls, bids_schema, entity_names): # They use generic patterns like [0-9a-zA-Z]+ return entity_values - - - - @staticmethod - def _get_entity_separator_from_schema(entity_name, bids_schema): - """Get entity separator pattern directly from schema definition. - - Uses schema entity definitions and directory rules to determine - the correct separator pattern. - """ - # Handle special entities that don't have BIDS prefixes - special_entities = {'suffix', 'extension', 'datatype'} - if entity_name in special_entities: - if entity_name == 'extension': - return '' # Extensions appear at end with no separator - elif entity_name == 'datatype': - return '/' # Datatypes are directory names - else: # suffix - return '[_/\\\\]+' # Suffixes have underscore separators - - # Regular BIDS entities - get info from schema - if entity_name not in bids_schema.objects.entities: - return f'[_/\\\\]+{entity_name}-' # Fallback - - entity_spec = bids_schema.objects.entities[entity_name] - bids_prefix = entity_spec.get('name', entity_name) - - # Directory-level entities (subject, session) use slash separators - if entity_name in ['subject', 'session']: - return f'[/\\\\]+{bids_prefix}-' - else: - # File-level entities use underscore separators - return f'[_/\\\\]+{bids_prefix}-' - - @staticmethod - def _extract_directory_config(entity_name, pattern, schema): - """Extract directory configuration from schema directory rules.""" - - # Get directory rules from schema - if not hasattr(schema.rules, 'directories') or not hasattr(schema.rules.directories, 'raw'): - return None - - dir_rules = schema.rules.directories.raw - - # Check if this entity has a directory rule - for rule_name, rule in dir_rules.items(): - rule_dict = dict(rule) - if rule_dict.get('entity') == entity_name: - # This entity creates directories - - # Build directory template based on schema hierarchy - # Find parent entities by checking what directories can contain this entity - parent_entities = [] - - # Check all directory rules to see which ones list this entity in subdirs - for parent_rule_name, parent_rule in dir_rules.items(): - parent_rule_dict = dict(parent_rule) - subdirs = parent_rule_dict.get('subdirs', []) - - # Check if this entity is in parent's subdirs - for subdir in subdirs: - if isinstance(subdir, dict) and 'oneOf' in subdir: - if entity_name in subdir['oneOf']: - if parent_rule_dict.get('entity'): - parent_entities.append(parent_rule_dict['entity']) - elif subdir == entity_name: - if parent_rule_dict.get('entity'): - parent_entities.append(parent_rule_dict['entity']) - - # Build template with parent hierarchy - template_parts = [] - template_parts.extend([f'{{{parent}}}' for parent in parent_entities]) - template_parts.append(f'{{{entity_name}}}') - - return ''.join(template_parts) - - return None diff --git a/src/bids/layout/tests/test_schema_config.py b/src/bids/layout/tests/test_schema_config.py index 21abd249..b8165276 100644 --- a/src/bids/layout/tests/test_schema_config.py +++ b/src/bids/layout/tests/test_schema_config.py @@ -61,53 +61,80 @@ def test_schema_version_parameter(self): """Test loading specific schema version.""" # Test loading BIDS v1.9.0 schema config = Config.load({'schema_version': '1.9.0'}) - + assert config.name.startswith('bids-schema-') assert len(config.entities) > 0 # Should include the version in the name assert '1.9.0' in config.name - + + def test_invalid_schema_version(self): + """Test that loading invalid schema version raises appropriate error.""" + # Test with a non-existent version + with pytest.raises(ValueError, match="Failed to load BIDS schema version"): + Config.load({'schema_version': '99.99.99'}) + def test_entity_generation(self): """Test entity pattern generation using real schema.""" config = Config.load('bids-schema') - + # Test that entities were generated assert len(config.entities) > 0 - + # Test specific entity properties we care about (using full names) assert 'subject' in config.entities assert 'task' in config.entities assert 'run' in config.entities - + # Test that patterns are valid regex for entity_name, entity in config.entities.items(): assert entity.regex is not None, f"Entity {entity_name} missing regex" # Test that regex compiles and works assert hasattr(entity.regex, 'search'), f"Entity {entity_name} regex invalid" - + # Test specific entity patterns work correctly subject_entity = config.entities['subject'] assert 'sub-' in subject_entity.pattern - + # Test subject pattern matches expected format match = subject_entity.regex.search('/sub-01/') assert match is not None assert match.group(1) == '01' - - # Test task pattern matches expected format + + # Test task pattern matches expected format task_entity = config.entities['task'] match = task_entity.regex.search('_task-rest_') assert match is not None assert match.group(1) == 'rest' - + # Test run pattern matches expected format run_entity = config.entities['run'] match = run_entity.regex.search('_run-02_') assert match is not None assert match.group(1) == '02' - + + def test_schema_directory_query(self): + """Test directory queries with schema-based config.""" + from bids import BIDSLayout + from pathlib import Path + import os + + # Use the 7t_trt test dataset + data_dir = Path(__file__).parent.parent.parent.parent.parent / 'tests' / 'data' / '7t_trt' + if not data_dir.exists(): + pytest.skip("Test dataset not available") + + # Create layout with schema config + layout = BIDSLayout(data_dir, config='bids-schema', derivatives=False) + + # Test directory query for subjects + subject_dirs = layout.get(target='subject', return_type='dir') + assert len(subject_dirs) > 0 + assert all(os.path.exists(d) for d in subject_dirs) + # Should include subject directories + assert any('sub-' in os.path.basename(d) for d in subject_dirs) + if __name__ == '__main__': From e2f8d9b979d32cd5d10b844a92eb6e7781acbb4e Mon Sep 17 00:00:00 2001 From: Ashley Stewart Date: Thu, 30 Oct 2025 10:50:57 +1000 Subject: [PATCH 09/10] Add tests to improve coverage --- src/bids/layout/layout.py | 10 +++--- src/bids/layout/tests/test_layout.py | 54 ++++++++++++++++++++++++++++ 2 files changed, 60 insertions(+), 4 deletions(-) diff --git a/src/bids/layout/layout.py b/src/bids/layout/layout.py index d447bce2..3ffad96b 100644 --- a/src/bids/layout/layout.py +++ b/src/bids/layout/layout.py @@ -446,12 +446,13 @@ def _build_path_from_schema_rules(self, entities, directory_rules): # Traverse schema directory hierarchy while current_level: rule = directory_rules.get(current_level) - if not rule: + if not rule: # pragma: no cover + # Schema always provides valid rules; current_level only set from schema subdirs break - + rule_dict = dict(rule) rule_entity = rule_dict.get('entity') - + # If this rule corresponds to an entity, add directory component if rule_entity and rule_entity in entities: entity_value = entities[rule_entity] @@ -459,7 +460,8 @@ def _build_path_from_schema_rules(self, entities, directory_rules): path_components.append(f'sub-{entity_value}') elif rule_entity == 'session': path_components.append(f'ses-{entity_value}') - else: + else: # pragma: no cover + # BIDS schema only defines subject/session entity directories currently path_components.append(str(entity_value)) elif current_level == 'datatype' and 'datatype' in entities: # Special case: datatype rule has no entity field but we use the datatype value diff --git a/src/bids/layout/tests/test_layout.py b/src/bids/layout/tests/test_layout.py index a9d99085..67df7ccb 100644 --- a/src/bids/layout/tests/test_layout.py +++ b/src/bids/layout/tests/test_layout.py @@ -1214,3 +1214,57 @@ def test_intended_for(temporary_dataset, intent): layout = BIDSLayout(temporary_dataset) assert layout.get_IntendedFor(subject='01') + + +def test_get_return_type_dir_with_schema_datatype(layout_7t_trt): + """Test return_type='dir' with schema config for datatype entity (covers line 463).""" + # Create a schema-based layout + layout = BIDSLayout(layout_7t_trt.root, config='bids-schema', derivatives=False) + + # Query for datatype directories - this should trigger the code path + # that handles non-subject/non-session entities (line 463) + dirs = layout.get(target='datatype', return_type='dir') + + # Should return actual datatype directories + assert len(dirs) > 0 + # Check that these are actual directories in the dataset + for d in dirs: + assert os.path.exists(d) + + +def test_get_return_type_dir_with_legacy_config_no_template(): + """Test return_type='dir' with legacy config missing directory template (covers line 842).""" + # Create a minimal config without directory templates + config_dict = { + 'name': 'test_config', + 'entities': [ + { + 'name': 'subject', + 'pattern': '[/\\\\]sub-([a-zA-Z0-9]+)', + 'directory': '/sub-{subject}/' # This has a template + }, + { + 'name': 'task', + 'pattern': '_task-([a-zA-Z0-9]+)', + # No directory template - this should trigger the error + } + ] + } + + # Create temporary test dataset + import tempfile + with tempfile.TemporaryDirectory() as tmpdir: + # Create minimal BIDS structure + Path(tmpdir).joinpath('dataset_description.json').write_text('{"Name": "test", "BIDSVersion": "1.6.0"}') + sub_dir = Path(tmpdir) / 'sub-01' + sub_dir.mkdir() + test_file = sub_dir / 'sub-01_task-rest_bold.nii.gz' + test_file.touch() + + # Load with our custom config + layout = BIDSLayout(tmpdir, config=config_dict, validate=False) + + # This should raise ValueError when trying to get directories for 'task' + # because task entity has no directory template + with pytest.raises(ValueError, match='Return type set to directory'): + layout.get(target='task', return_type='dir') From e3e6108cec730ee8c02bb96bbab2a2ba12e90c6c Mon Sep 17 00:00:00 2001 From: Ashley Stewart Date: Thu, 30 Oct 2025 12:18:08 +1000 Subject: [PATCH 10/10] Remove unnecessary test execution code --- src/bids/layout/tests/test_schema_config.py | 10 ---------- .../layout/tests/test_schema_pattern_validation.py | 10 ---------- .../layout/tests/test_schema_version_differences.py | 8 -------- src/bids/layout/tests/test_schema_vs_json_config.py | 7 ------- 4 files changed, 35 deletions(-) diff --git a/src/bids/layout/tests/test_schema_config.py b/src/bids/layout/tests/test_schema_config.py index b8165276..42feef50 100644 --- a/src/bids/layout/tests/test_schema_config.py +++ b/src/bids/layout/tests/test_schema_config.py @@ -135,13 +135,3 @@ def test_schema_directory_query(self): # Should include subject directories assert any('sub-' in os.path.basename(d) for d in subject_dirs) - - -if __name__ == '__main__': - # Allow running as script for quick testing - test_instance = TestSchemaConfig() - test_instance.test_load_bids_schema_basic() - test_instance.test_schema_entity_patterns() - test_instance.test_schema_version_tracking() - test_instance.test_schema_version_parameter() - print("Basic tests passed!") \ No newline at end of file diff --git a/src/bids/layout/tests/test_schema_pattern_validation.py b/src/bids/layout/tests/test_schema_pattern_validation.py index ebfef9af..594825ea 100644 --- a/src/bids/layout/tests/test_schema_pattern_validation.py +++ b/src/bids/layout/tests/test_schema_pattern_validation.py @@ -185,13 +185,3 @@ def test_no_hardcoded_values(self): print(f"Schema datatypes used: {len(bids_schema.objects.datatypes)}") print(f"Schema entities used: {len(bids_schema.objects.entities)}") - -if __name__ == '__main__': - # Allow running as script for quick testing - test_instance = TestSchemaAccuracy() - test_instance.test_extension_patterns_match_schema() - test_instance.test_suffix_patterns_match_schema() - test_instance.test_datatype_patterns_match_schema() - test_instance.test_entity_format_patterns_match_schema() - test_instance.test_no_hardcoded_values() - print("All schema accuracy tests passed!") \ No newline at end of file diff --git a/src/bids/layout/tests/test_schema_version_differences.py b/src/bids/layout/tests/test_schema_version_differences.py index 37da8cf7..10b9b239 100644 --- a/src/bids/layout/tests/test_schema_version_differences.py +++ b/src/bids/layout/tests/test_schema_version_differences.py @@ -90,11 +90,3 @@ def test_schema_version_metadata_differences(self): assert config_current.name != config_v190.name assert config_v190.name != config_v180.name - -if __name__ == '__main__': - # Allow running as script for quick testing - test_instance = TestSchemaVersionDifferences() - test_instance.test_schema_version_entity_differences() - test_instance.test_motion_datatype_evolution() - test_instance.test_schema_version_metadata_differences() - print("Schema version difference tests completed!") \ No newline at end of file diff --git a/src/bids/layout/tests/test_schema_vs_json_config.py b/src/bids/layout/tests/test_schema_vs_json_config.py index f47b3d4a..507ea47c 100644 --- a/src/bids/layout/tests/test_schema_vs_json_config.py +++ b/src/bids/layout/tests/test_schema_vs_json_config.py @@ -307,10 +307,3 @@ def _map_entity_name_old_to_new(self, new_entity_name): } return name_mapping.get(new_entity_name, new_entity_name) - -if __name__ == '__main__': - # Allow running as script for quick testing - test_instance = TestConfigCompatibility() - test_instance.test_entity_coverage_matches() - test_instance.test_suffix_coverage_matches() - test_instance.test_filename_parsing_compatibility()