From 0059f506196311094a1e7c1c8b40548837d55d10 Mon Sep 17 00:00:00 2001
From: Ashley Stewart <a.stewart.au@gmail.com>
Date: Tue, 9 Sep 2025 15:08:10 +1000
Subject: [PATCH 01/10] Schema integration first try

---
 pyproject.toml                                |   1 +
 schema_integration_demo.ipynb                 | 437 ++++++++++++++++++
 src/bids/layout/layout.py                     | 142 +++++-
 src/bids/layout/models.py                     | 368 ++++++++++++++-
 .../layout/tests/test_config_compatibility.py | 378 +++++++++++++++
 src/bids/layout/tests/test_schema_accuracy.py | 197 ++++++++
 src/bids/layout/tests/test_schema_config.py   | 116 +++++
 7 files changed, 1615 insertions(+), 24 deletions(-)
 create mode 100644 schema_integration_demo.ipynb
 create mode 100644 src/bids/layout/tests/test_config_compatibility.py
 create mode 100644 src/bids/layout/tests/test_schema_accuracy.py
 create mode 100644 src/bids/layout/tests/test_schema_config.py

diff --git a/pyproject.toml b/pyproject.toml
index 5b0302c2..18aad135 100644
--- a/pyproject.toml
+++ b/pyproject.toml
@@ -30,6 +30,7 @@ dependencies = [
   "formulaic >=0.3",
   "sqlalchemy >=1.4.31",
   "bids-validator>=1.14.7",  # Keep up-to-date to ensure support for recent modalities
+  "bidsschematools>=0.10",  # Required for schema-based configs
   "num2words >=0.5.10",
   "click >=8.0",
   "universal_pathlib >=0.2.2",
diff --git a/schema_integration_demo.ipynb b/schema_integration_demo.ipynb
new file mode 100644
index 00000000..0109eeef
--- /dev/null
+++ b/schema_integration_demo.ipynb
@@ -0,0 +1,437 @@
+{
+ "cells": [
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "# BIDS Schema Integration Demo\n",
+    "\n",
+    "This notebook demonstrates the new BIDS schema integration features in PyBIDS, showing how the new schema-based configuration compares to the legacy JSON-based configuration."
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 1,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "from bids import BIDSLayout\n",
+    "from bids.tests import get_test_data_path\n",
+    "import os"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "## Setup: Using a test dataset\n",
+    "\n",
+    "We'll use the 7T TRT dataset that comes with PyBIDS for testing."
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 2,
+   "metadata": {},
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Using dataset at: /home/ashley/repos/bids/pybids/tests/data/7t_trt\n",
+      "total 76\n",
+      "drwxr-xr-x 12 ashley ashley 4096 Aug 21 08:59 .\n",
+      "drwxr-xr-x  8 ashley ashley 4096 Aug 21 08:59 ..\n",
+      "-rw-r--r--  1 ashley ashley    0 Aug 21 08:59 README\n",
+      "-rw-r--r--  1 ashley ashley   55 Aug 21 08:59 dataset_description.json\n",
+      "-rw-r--r--  1 ashley ashley  482 Aug 21 08:59 participants.tsv\n",
+      "drwxr-xr-x  4 ashley ashley 4096 Aug 21 08:59 sub-01\n",
+      "drwxr-xr-x  4 ashley ashley 4096 Aug 21 08:59 sub-02\n",
+      "drwxr-xr-x  4 ashley ashley 4096 Aug 21 08:59 sub-03\n",
+      "drwxr-xr-x  4 ashley ashley 4096 Aug 21 08:59 sub-04\n"
+     ]
+    }
+   ],
+   "source": [
+    "# Get path to test dataset\n",
+    "data_path = os.path.join(get_test_data_path(), '7t_trt')\n",
+    "print(f\"Using dataset at: {data_path}\")\n",
+    "\n",
+    "# List the first few files to see the structure\n",
+    "!ls -la {data_path} | head -10"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "## Part 1: Legacy JSON-based Configuration\n",
+    "\n",
+    "The traditional PyBIDS approach uses JSON configuration files to define entities and their patterns."
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 3,
+   "metadata": {},
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Legacy layout initialized with 339 files\n",
+      "\n",
+      "Available entities: ['CogAtlasID', 'Columns', 'EchoTime', 'EchoTime1', 'EchoTime2', 'EffectiveEchoSpacing', 'IntendedFor', 'MTState', 'PhaseEncodingDirection', 'RepetitionTime', 'SamplingFrequency', 'SliceEncodingDirection', 'SliceTiming', 'StartTime', 'TaskName', 'acquisition', 'ceagent', 'chunk', 'datatype', 'direction', 'echo', 'extension', 'flip', 'fmap', 'inv', 'modality', 'mt', 'nucleus', 'part', 'proc', 'reconstruction', 'recording', 'run', 'sample', 'scans', 'session', 'space', 'staining', 'subject', 'suffix', 'task', 'tracer', 'tracksys', 'volume']\n",
+      "\n",
+      "Layout info:\n",
+      "BIDS Layout: .../bids/pybids/tests/data/7t_trt | Subjects: 10 | Sessions: 20 | Runs: 20\n"
+     ]
+    }
+   ],
+   "source": [
+    "# Initialize layout with legacy config (this is the default)\n",
+    "layout_legacy = BIDSLayout(data_path, config=['bids'])\n",
+    "print(f\"Legacy layout initialized with {len(layout_legacy.get())} files\")\n",
+    "print(f\"\\nAvailable entities: {sorted(layout_legacy.get_entities().keys())}\")\n",
+    "\n",
+    "# Print basic layout information like in the docs tutorial\n",
+    "print(f\"\\nLayout info:\")\n",
+    "print(layout_legacy)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 4,
+   "metadata": {},
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Found 6 BOLD files for subject 01:\n",
+      "  sub-01_ses-1_task-rest_acq-fullbrain_run-1_bold.nii.gz\n",
+      "  sub-01_ses-1_task-rest_acq-fullbrain_run-2_bold.nii.gz\n",
+      "  sub-01_ses-1_task-rest_acq-prefrontal_bold.nii.gz\n",
+      "  sub-01_ses-2_task-rest_acq-fullbrain_run-1_bold.nii.gz\n",
+      "  sub-01_ses-2_task-rest_acq-fullbrain_run-2_bold.nii.gz\n",
+      "  sub-01_ses-2_task-rest_acq-prefrontal_bold.nii.gz\n",
+      "\n",
+      "Compare return types:\n",
+      "return_types=[<class 'bids.layout.models.BIDSImageFile'>, <class 'bids.layout.models.BIDSImageFile'>]\n",
+      "return_types=[<class 'str'>, <class 'str'>]\n"
+     ]
+    }
+   ],
+   "source": [
+    "# Query example: Get all BOLD files for subject 01\n",
+    "bold_files_legacy = layout_legacy.get(\n",
+    "    subject='01', \n",
+    "    suffix='bold', \n",
+    "    extension='nii.gz',\n",
+    "    return_type='filename'  # Returns filenames instead of BIDSFile objects\n",
+    ")\n",
+    "print(f\"Found {len(bold_files_legacy)} BOLD files for subject 01:\")\n",
+    "for f in bold_files_legacy:\n",
+    "    print(f\"  {os.path.basename(f)}\")\n",
+    "\n",
+    "# Show the difference between return types like in docs\n",
+    "print(f\"\\nCompare return types:\")\n",
+    "bold_objects = layout_legacy.get(subject='01', suffix='bold', extension='nii.gz')[:2]\n",
+    "bold_filenames = layout_legacy.get(subject='01', suffix='bold', extension='nii.gz', return_type='filename')[:2]\n",
+    "\n",
+    "print(f\"return_types={[type(obj) for obj in bold_objects]}\")\n",
+    "print(f\"return_types={[type(fname) for fname in bold_filenames]}\")"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "## Part 2: New Schema-based Configuration\n",
+    "\n",
+    "The new approach uses the BIDS schema directly via `bidsschematools` to understand the dataset structure."
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 5,
+   "metadata": {},
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Schema-based layout initialized with 339 files\n",
+      "\n",
+      "Available entities: ['CogAtlasID', 'Columns', 'EchoTime', 'EchoTime1', 'EchoTime2', 'EffectiveEchoSpacing', 'IntendedFor', 'MTState', 'PhaseEncodingDirection', 'RepetitionTime', 'SamplingFrequency', 'SliceEncodingDirection', 'SliceTiming', 'StartTime', 'TaskName', 'acquisition', 'ceagent', 'chunk', 'datatype', 'density', 'description', 'direction', 'echo', 'extension', 'flip', 'hemisphere', 'inversion', 'label', 'modality', 'mtransfer', 'nucleus', 'part', 'processing', 'reconstruction', 'recording', 'resolution', 'run', 'sample', 'segmentation', 'session', 'space', 'split', 'stain', 'subject', 'suffix', 'task', 'tracer', 'tracksys', 'volume']\n",
+      "\n",
+      "Layout info:\n",
+      "BIDS Layout: .../bids/pybids/tests/data/7t_trt | Subjects: 10 | Sessions: 20 | Runs: 20\n"
+     ]
+    }
+   ],
+   "source": [
+    "# Initialize layout with schema-based config\n",
+    "# This uses Config.load('bids-schema') internally when you pass 'bids-schema' in config\n",
+    "layout_schema = BIDSLayout(data_path, config=['bids-schema'])\n",
+    "print(f\"Schema-based layout initialized with {len(layout_schema.get())} files\")\n",
+    "print(f\"\\nAvailable entities: {sorted(layout_schema.get_entities().keys())}\")\n",
+    "\n",
+    "# Print basic layout information\n",
+    "print(f\"\\nLayout info:\")\n",
+    "print(layout_schema)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 6,
+   "metadata": {},
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Found 6 BOLD files for subject 01:\n",
+      "  sub-01_ses-1_task-rest_acq-fullbrain_run-1_bold.nii.gz\n",
+      "  sub-01_ses-1_task-rest_acq-fullbrain_run-2_bold.nii.gz\n",
+      "  sub-01_ses-1_task-rest_acq-prefrontal_bold.nii.gz\n",
+      "  sub-01_ses-2_task-rest_acq-fullbrain_run-1_bold.nii.gz\n",
+      "  sub-01_ses-2_task-rest_acq-fullbrain_run-2_bold.nii.gz\n",
+      "  sub-01_ses-2_task-rest_acq-prefrontal_bold.nii.gz\n",
+      "\n",
+      "Compare return types:\n",
+      "return_types=[<class 'bids.layout.models.BIDSImageFile'>, <class 'bids.layout.models.BIDSImageFile'>]\n",
+      "return_types=[<class 'str'>, <class 'str'>]\n"
+     ]
+    }
+   ],
+   "source": [
+    "# Same query with schema-based layout\n",
+    "bold_files_schema = layout_schema.get(\n",
+    "    subject='01', \n",
+    "    suffix='bold', \n",
+    "    extension='nii.gz',\n",
+    "    return_type='filename'\n",
+    ")\n",
+    "print(f\"Found {len(bold_files_schema)} BOLD files for subject 01:\")\n",
+    "for f in bold_files_schema:\n",
+    "    print(f\"  {os.path.basename(f)}\")\n",
+    "\n",
+    "# Show the difference between return types like in docs\n",
+    "print(f\"\\nCompare return types:\")\n",
+    "bold_objects_schema = layout_schema.get(subject='01', suffix='bold', extension='nii.gz')[:2]\n",
+    "bold_filenames_schema = layout_schema.get(subject='01', suffix='bold', extension='nii.gz', return_type='filename')[:2]\n",
+    "\n",
+    "print(f\"return_types={[type(obj) for obj in bold_objects_schema]}\")\n",
+    "print(f\"return_types={[type(fname) for fname in bold_filenames_schema]}\")\n"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "## Part 3: Comparing the Two Approaches\n",
+    "\n",
+    "Let's verify that both approaches give us the same results."
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 7,
+   "metadata": {},
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Legacy layout: 339 files\n",
+      "Schema layout: 339 files\n",
+      "\n",
+      "Match: True\n"
+     ]
+    }
+   ],
+   "source": [
+    "# Compare file counts\n",
+    "legacy_count = len(layout_legacy.get())\n",
+    "schema_count = len(layout_schema.get())\n",
+    "\n",
+    "print(f\"Legacy layout: {legacy_count} files\")\n",
+    "print(f\"Schema layout: {schema_count} files\")\n",
+    "print(f\"\\nMatch: {legacy_count == schema_count}\")"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 8,
+   "metadata": {},
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Legacy entities: 44\n",
+      "Schema entities: 49\n",
+      "\n",
+      "Entities only in legacy: ['fmap', 'inv', 'mt', 'proc', 'scans', 'staining']\n",
+      "\n",
+      "Entities only in schema: ['density', 'description', 'hemisphere', 'inversion', 'label', 'mtransfer', 'processing', 'resolution', 'segmentation', 'split', 'stain']\n"
+     ]
+    }
+   ],
+   "source": [
+    "# Compare entities\n",
+    "legacy_entities = set(layout_legacy.get_entities().keys())\n",
+    "schema_entities = set(layout_schema.get_entities().keys())\n",
+    "\n",
+    "print(f\"Legacy entities: {len(legacy_entities)}\")\n",
+    "print(f\"Schema entities: {len(schema_entities)}\")\n",
+    "\n",
+    "# Check for differences\n",
+    "only_legacy = legacy_entities - schema_entities\n",
+    "only_schema = schema_entities - legacy_entities\n",
+    "\n",
+    "if only_legacy:\n",
+    "    print(f\"\\nEntities only in legacy: {sorted(only_legacy)}\")\n",
+    "if only_schema:\n",
+    "    print(f\"\\nEntities only in schema: {sorted(only_schema)}\")\n",
+    "if not only_legacy and not only_schema:\n",
+    "    print(\"\\nAll entities match!\")"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 9,
+   "metadata": {},
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Query: {'suffix': 'bold', 'extension': 'nii.gz'}\n",
+      "Legacy: 60 files\n",
+      "Schema: 60 files\n",
+      "✓ Results match!\n",
+      "\n",
+      "Query: {'suffix': 'T1w', 'extension': 'nii.gz'}\n",
+      "Legacy: 10 files\n",
+      "Schema: 10 files\n",
+      "✓ Results match!\n",
+      "\n",
+      "Query: {'subject': '01', 'suffix': 'scans', 'extension': 'tsv'}\n",
+      "Legacy: 2 files\n",
+      "Schema: 2 files\n",
+      "✓ Results match!\n",
+      "\n"
+     ]
+    }
+   ],
+   "source": [
+    "# Compare specific query results\n",
+    "def compare_queries(layout1, layout2, name1=\"Layout 1\", name2=\"Layout 2\", **kwargs):\n",
+    "    \"\"\"Compare results from two layouts for the same query.\"\"\"\n",
+    "    result1 = set(layout1.get(return_type='filename', **kwargs))\n",
+    "    result2 = set(layout2.get(return_type='filename', **kwargs))\n",
+    "    \n",
+    "    print(f\"Query: {kwargs}\")\n",
+    "    print(f\"{name1}: {len(result1)} files\")\n",
+    "    print(f\"{name2}: {len(result2)} files\")\n",
+    "    \n",
+    "    if result1 == result2:\n",
+    "        print(\"✓ Results match!\\n\")\n",
+    "    else:\n",
+    "        only1 = result1 - result2\n",
+    "        only2 = result2 - result1\n",
+    "        if only1:\n",
+    "            print(f\"Only in {name1}: {[os.path.basename(f) for f in only1]}\")\n",
+    "        if only2:\n",
+    "            print(f\"Only in {name2}: {[os.path.basename(f) for f in only2]}\")\n",
+    "        print()\n",
+    "\n",
+    "# Test various queries\n",
+    "compare_queries(layout_legacy, layout_schema, \"Legacy\", \"Schema\", \n",
+    "                suffix='bold', extension='nii.gz')\n",
+    "\n",
+    "compare_queries(layout_legacy, layout_schema, \"Legacy\", \"Schema\",\n",
+    "                suffix='T1w', extension='nii.gz')\n",
+    "\n",
+    "# 1. Test with scans TSV files (these exist)\n",
+    "compare_queries(layout_legacy, layout_schema, \"Legacy\", \"Schema\",\n",
+    "subject='01', suffix='scans', extension='tsv')\n"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "## Part 5: Performance Comparison\n",
+    "\n",
+    "Let's compare the initialization time for both approaches."
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 10,
+   "metadata": {},
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Legacy initialization: 0.684 seconds\n",
+      "Schema initialization: 0.679 seconds\n",
+      "\n",
+      "Difference: 0.005 seconds\n",
+      "Schema is faster by 0.8%\n"
+     ]
+    }
+   ],
+   "source": [
+    "import time\n",
+    "\n",
+    "# Time legacy initialization\n",
+    "start = time.time()\n",
+    "layout_legacy_timed = BIDSLayout(data_path, config=['bids'], reset_database=True)\n",
+    "legacy_time = time.time() - start\n",
+    "\n",
+    "# Time schema initialization  \n",
+    "start = time.time()\n",
+    "layout_schema_timed = BIDSLayout(data_path, config=['bids-schema'], reset_database=True)\n",
+    "schema_time = time.time() - start\n",
+    "\n",
+    "print(f\"Legacy initialization: {legacy_time:.3f} seconds\")\n",
+    "print(f\"Schema initialization: {schema_time:.3f} seconds\")\n",
+    "print(f\"\\nDifference: {abs(legacy_time - schema_time):.3f} seconds\")\n",
+    "print(f\"Schema is {'faster' if schema_time < legacy_time else 'slower'} by {abs(1 - schema_time/legacy_time)*100:.1f}%\")"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": []
+  }
+ ],
+ "metadata": {
+  "kernelspec": {
+   "display_name": "Python 3",
+   "language": "python",
+   "name": "python3"
+  },
+  "language_info": {
+   "codemirror_mode": {
+    "name": "ipython",
+    "version": 3
+   },
+   "file_extension": ".py",
+   "mimetype": "text/x-python",
+   "name": "python",
+   "nbconvert_exporter": "python",
+   "pygments_lexer": "ipython3",
+   "version": "3.10.18"
+  }
+ },
+ "nbformat": 4,
+ "nbformat_minor": 4
+}
diff --git a/src/bids/layout/layout.py b/src/bids/layout/layout.py
index 998d26a3..afa2c399 100644
--- a/src/bids/layout/layout.py
+++ b/src/bids/layout/layout.py
@@ -414,6 +414,93 @@ def get_entities(self, scope='all', metadata=None):
             entities.update({e.name: e for e in results})
         return entities
 
+    def _find_directories_for_entity_using_schema(self, entity_name, results):
+        """Find directories using schema directory rules."""
+        from bidsschematools import schema as bst_schema
+        import os
+        
+        # Load schema directory rules
+        bids_schema = bst_schema.load_schema()
+        if not hasattr(bids_schema.rules, 'directories') or not hasattr(bids_schema.rules.directories, 'raw'):
+            # Fallback to simple approach if no schema rules
+            return [f.dirname for f in results if entity_name in f.entities]
+        
+        directory_rules = bids_schema.rules.directories.raw
+        directories = set()
+        
+        # Build directory paths using schema rules for each file
+        for f in results:
+            if entity_name in f.entities:
+                # Build path based on schema directory hierarchy
+                path_components = self._build_path_from_schema_rules(f.entities, directory_rules)
+                if path_components:
+                    # Convert to full directory path
+                    directory_path = os.path.join(self.root, *path_components)
+                    directories.add(directory_path)
+                else:
+                    # Fallback to actual path if schema reconstruction fails
+                    directories.add(f.dirname)
+        
+        return list(directories)
+    
+    def _build_path_from_schema_rules(self, entities, directory_rules):
+        """Build directory path components from schema rules and entity values."""
+        path_components = []
+        current_level = 'root'
+        
+        # Traverse schema directory hierarchy
+        while current_level:
+            rule = directory_rules.get(current_level)
+            if not rule:
+                break
+                
+            rule_dict = dict(rule)
+            rule_entity = rule_dict.get('entity')
+            
+            # If this rule corresponds to an entity, add directory component
+            if rule_entity and rule_entity in entities:
+                entity_value = entities[rule_entity]
+                if rule_entity == 'subject':
+                    path_components.append(f'sub-{entity_value}')
+                elif rule_entity == 'session':
+                    path_components.append(f'ses-{entity_value}')
+                else:
+                    path_components.append(str(entity_value))
+            elif current_level == 'datatype' and 'datatype' in entities:
+                # Special case: datatype rule has no entity field but we use the datatype value
+                path_components.append(entities['datatype'])
+            
+            # Determine next level based on subdirs and available entities
+            subdirs = rule_dict.get('subdirs', [])
+            next_level = None
+            
+            # First pass: look for entity-based subdirs across all options
+            entity_candidates = []
+            rule_candidates = []
+            
+            for subdir in subdirs:
+                if isinstance(subdir, dict) and 'oneOf' in subdir:
+                    for option in subdir['oneOf']:
+                        if option in entities:
+                            entity_candidates.append(option)
+                        elif option in directory_rules:
+                            rule_candidates.append(option)
+                elif isinstance(subdir, str):
+                    if subdir in entities:
+                        entity_candidates.append(subdir)
+                    elif subdir in directory_rules:
+                        rule_candidates.append(subdir)
+            
+            # Prefer entity candidates, fallback to rule candidates
+            if entity_candidates:
+                next_level = entity_candidates[0]  # Take first entity match
+            elif rule_candidates:
+                next_level = rule_candidates[0]  # Take first rule match
+            
+            current_level = next_level
+        
+        return path_components
+
     def get_files(self, scope='all'):
         """Get BIDSFiles for all layouts in the specified scope.
 
@@ -678,7 +765,7 @@ def get(self, return_type='object', target=None, scope='all',
                                '`layout.get(**filters)`.')
 
         # Ensure leading periods if extensions were passed
-        if 'extension' in filters and 'bids' in self.config:
+        if 'extension' in filters and ('bids' in self.config or any('bids-schema' in c for c in self.config)):
             filters['extension'] = ['.' + x.lstrip('.') if isinstance(x, str) else x
                                     for x in listify(filters['extension'])]
 
@@ -746,28 +833,37 @@ def get(self, return_type='object', target=None, scope='all',
                 ))
                 results = natural_sort(list(set(results)))
             elif return_type == 'dir':
-                template = entities[target].directory
-                if template is None:
-                    raise ValueError('Return type set to directory, but no '
-                                     'directory template is defined for the '
-                                     'target entity (\"%s\").' % target)
-                # Construct regex search pattern from target directory template
-                # On Windows, the regex won't compile if, e.g., there is a folder starting with "U" on the path.
-                # Converting to a POSIX path with forward slashes solves this.
-                template = self._root.as_posix() + template
-                to_rep = re.findall(r'{(.*?)\}', template)
-                for ent in to_rep:
-                    patt = entities[ent].pattern
-                    template = template.replace('{%s}' % ent, patt)
-                # Avoid matching subfolders. We are working with POSIX paths here, so we explicitly use "/"
-                # as path separator.
-                template += r'[^/]*$'
-                matches = [
-                    f.dirname
-                    for f in results
-                    if re.search(template, f._dirname.as_posix())
-                ]
-                results = natural_sort(list(set(matches)))
+                # Check if we're using schema-based config or legacy config
+                using_schema = any('bids-schema' in str(c) for c in listify(self.config))
+                
+                if using_schema:
+                    # Use schema directory rules for schema-based configs
+                    directories = self._find_directories_for_entity_using_schema(target, results)
+                    results = natural_sort(list(set(directories)))
+                else:
+                    # Use original template-based approach for legacy configs
+                    template = entities[target].directory
+                    if template is None:
+                        raise ValueError('Return type set to directory, but no '
+                                         'directory template is defined for the '
+                                         'target entity (\"%s\").' % target)
+                    # Construct regex search pattern from target directory template
+                    # On Windows, the regex won't compile if, e.g., there is a folder starting with "U" on the path.
+                    # Converting to a POSIX path with forward slashes solves this.
+                    template = self._root.as_posix() + template
+                    to_rep = re.findall(r'{(.*?)\}', template)
+                    for ent in to_rep:
+                        patt = entities[ent].pattern
+                        template = template.replace('{%s}' % ent, patt)
+                    # Avoid matching subfolders. We are working with POSIX paths here, so we explicitly use "/"
+                    # as path separator.
+                    template += r'[^/]*$'
+                    matches = [
+                        f.dirname
+                        for f in results
+                        if re.search(template, f._dirname.as_posix())
+                    ]
+                    results = natural_sort(list(set(matches)))
         else:
             results = natural_sort(results, 'path')
 
diff --git a/src/bids/layout/models.py b/src/bids/layout/models.py
index 104e1f42..48f5a285 100644
--- a/src/bids/layout/models.py
+++ b/src/bids/layout/models.py
@@ -16,6 +16,10 @@
 from sqlalchemy import Column, String, Boolean, ForeignKey, Table
 from sqlalchemy.orm import reconstructor, relationship, backref, object_session
 
+from bidsschematools import schema as bst_schema
+from bidsschematools import rules
+import re
+
 try:
     from sqlalchemy.orm import declarative_base
 except ImportError:  # sqlalchemy < 1.4
@@ -148,7 +152,7 @@ def _init_on_load(self):
         self.default_path_patterns = json.loads(self._default_path_patterns)
 
     @classmethod
-    def load(self, config, session=None):
+    def load(cls, config, session=None):
         """Load a Config instance from the passed configuration data.
 
         Parameters
@@ -160,6 +164,8 @@ def load(self, config, session=None):
                 (e.g., 'bids' or 'derivatives')
             * A path to a JSON file containing config information
             * A dictionary containing config information
+            * 'bids-schema' to load from the official BIDS schema
+            * dict with 'schema_version' key to load specific schema version
         session : :obj:`sqlalchemy.orm.session.Session` or None
             An optional SQLAlchemy Session instance.
             If passed, the session is used to check the database for (and
@@ -170,6 +176,16 @@ def load(self, config, session=None):
         A Config instance.
         """
 
+        # Handle schema-based config loading
+        if config == 'bids-schema':
+            return cls._from_schema(session=session)
+        elif isinstance(config, dict) and 'schema_version' in config:
+            return cls._from_schema(
+                schema_version=config['schema_version'],
+                session=session
+            )
+
+        # Existing JSON/file-based loading
         if isinstance(config, (str, Path, UPath)):
             config_paths = get_option('config_paths')
             if config in config_paths:
@@ -188,6 +204,356 @@ def load(self, config, session=None):
 
         return Config(session=session, **config)
 
+    @classmethod
+    def _from_schema(cls, schema_version=None, session=None):
+        """Load config from BIDS schema - store patterns directly."""
+        from bidsschematools import rules, schema as bidsschema
+        
+        bids_schema = bidsschema.load_schema()
+        
+        # Collect ALL patterns from bidsschematools (keep existing collection logic)
+        all_patterns = []
+        file_sections = {
+            'raw': bids_schema.rules.files.raw,
+            'deriv': bids_schema.rules.files.deriv,  
+            'common': bids_schema.rules.files.common
+        }
+        
+        for section_type, sections in file_sections.items():
+            if section_type == 'raw':
+                for datatype_name in sections.keys():
+                    datatype_rules = getattr(sections, datatype_name)
+                    regex_rules = rules.regexify_filename_rules(datatype_rules, bids_schema, level=1)
+                    all_patterns.extend(regex_rules)
+            else:
+                for subsection_name in sections.keys():
+                    subsection_rules = getattr(sections, subsection_name)
+                    regex_rules = rules.regexify_filename_rules(subsection_rules, bids_schema, level=1)
+                    all_patterns.extend(regex_rules)
+        
+        # Store patterns directly - NO PARSING EVER
+        config_name = f'bids-schema-{bids_schema.bids_version}'
+        
+        # Extract entities directly from schema rules - no regex parsing!
+        entity_names = cls._extract_entity_names_from_rules(bids_schema)
+        
+        # Extract actual values for key entities from schema rules directly
+        entity_values = cls._extract_entity_values_from_rules(bids_schema, entity_names)
+        
+        # Create Entity objects
+        entities = cls._create_entities_from_schema(entity_names, entity_values, bids_schema)
+        
+        config = cls(name=config_name, entities=entities, session=session)
+        
+        return config
+    
+    @classmethod
+    def _extract_entity_names_from_rules(cls, bids_schema):
+        """Extract entity names directly from schema rules - no regex parsing!"""
+        
+        # Get entity names from all rule sections
+        entity_names = set()
+        file_sections = {
+            'raw': bids_schema.rules.files.raw,
+            'deriv': bids_schema.rules.files.deriv,  
+            'common': bids_schema.rules.files.common
+        }
+        
+        for section_type, sections in file_sections.items():
+            for section_name in sections.keys():
+                section_rules = getattr(sections, section_name)
+                for rule_name in section_rules.keys():
+                    rule = getattr(section_rules, rule_name)
+                    rule_dict = dict(rule)
+                    if 'entities' in rule_dict:
+                        entity_names.update(rule_dict['entities'].keys())
+        
+        # Add special constructs that appear in patterns but aren't "entities"
+        entity_names.update(['suffix', 'extension', 'datatype'])
+        
+        return entity_names
+    
+    @classmethod
+    def _create_entities_from_schema(cls, entity_names, entity_values, bids_schema):
+        """Create Entity objects from schema information."""
+        entities = []
+        
+        for entity_name in sorted(entity_names):
+            # Get entity info from schema if available
+            entity_spec = bids_schema.objects.entities.get(entity_name, {})
+            
+            # Handle special entities that don't have BIDS prefixes
+            special_entities = {'suffix', 'extension', 'datatype'}
+            
+            if entity_name in special_entities:
+                # These entities don't have prefixes - they appear directly
+                if entity_name in entity_values and entity_values[entity_name]:
+                    # Filter out problematic values for extension entity
+                    if entity_name == 'extension':
+                        # Remove empty string and wildcard patterns that would match everything
+                        filtered_values = {v for v in entity_values[entity_name] 
+                                         if v and v not in ('.*', '**', '*')}
+                        values = '|'.join(sorted(filtered_values))
+                    else:
+                        values = '|'.join(sorted(entity_values[entity_name]))
+                    
+                    if entity_name == 'extension':
+                        pattern = fr'({values})\Z'  # Extensions are at the end
+                    elif entity_name == 'datatype':
+                        pattern = fr'/({values})/'  # Datatypes are directory names  
+                    else:  # suffix
+                        pattern = fr'[_/\\\\]({values})\.?'  # Before extension, with optional dot
+            else:
+                # Regular entities - use schema format patterns directly!
+                bids_prefix = entity_spec.get('name', entity_name)
+                format_type = entity_spec.get('format', 'label')
+                
+                # Get the official value pattern from schema formats
+                if format_type in bids_schema.objects.formats:
+                    format_obj = dict(bids_schema.objects.formats[format_type])
+                    value_pattern = format_obj['pattern']  # [0-9a-zA-Z]+, [0-9]+, etc.
+                
+                # Combine prefix + schema format pattern
+                if entity_name in ['subject', 'session']:
+                    pattern = fr'[/\\\\]+{bids_prefix}-({value_pattern})'
+                else:
+                    pattern = fr'[_/\\\\]+{bids_prefix}-({value_pattern})'
+            
+            entity_config = {
+                'name': entity_name,
+                'pattern': pattern,
+                'dtype': 'int' if entity_spec.get('format') == 'index' else 'str'
+            }
+            
+            entities.append(entity_config)
+        
+        return entities
+    
+    @classmethod
+    def _extract_entity_values_from_rules(cls, bids_schema, entity_names):
+        """Extract entity values directly from schema rules - no regex parsing!"""
+        
+        entity_values = {name: set() for name in entity_names}
+        file_sections = {
+            'raw': bids_schema.rules.files.raw,
+            'deriv': bids_schema.rules.files.deriv,  
+            'common': bids_schema.rules.files.common
+        }
+        
+        for section_type, sections in file_sections.items():
+            for section_name in sections.keys():
+                section_rules = getattr(sections, section_name)
+                for rule_name in section_rules.keys():
+                    rule = getattr(section_rules, rule_name)
+                    rule_dict = dict(rule)
+                    
+                    # Get values for special entities directly from rules
+                    if 'suffixes' in rule_dict:
+                        entity_values['suffix'].update(rule_dict['suffixes'])
+                    if 'extensions' in rule_dict:
+                        entity_values['extension'].update(rule_dict['extensions'])
+                    if 'datatypes' in rule_dict:
+                        entity_values['datatype'].update(rule_dict['datatypes'])
+                    
+                    # For regular entities, we don't extract specific values
+                    # They use generic patterns like [0-9a-zA-Z]+ 
+        
+        return entity_values
+    
+    
+    
+    @staticmethod 
+    def _extract_separator_from_context(entity_name, before_context, after_context, full_pattern, schema):
+        """Extract separator pattern from schema context.
+        
+        Analyzes the actual schema pattern context to determine what
+        separators are used for this entity by examining the schema regex
+        and entity classification.
+        """
+        # Find the entity in the full pattern and extract its actual context
+        entity_match = re.search(rf'(\S*?)(\(\?P<{entity_name}>[^)]+\))', full_pattern)
+        if not entity_match:
+            return f"[_/\\\\]+{entity_name}-"  # Fallback
+        
+        prefix_context = entity_match.group(1)
+        
+        # Determine entity type from schema - is it a real entity or special construct?
+        is_real_entity = entity_name in schema.objects.entities
+        
+        if not is_real_entity:
+            # This is a schema construct (extension/suffix/datatype capturing group)
+            # Analyze the pattern context to determine positioning rules
+            
+            # Analyze from the actual pattern context
+            if '/' in before_context and '/' in after_context:
+                # Appears between directory separators (like datatype)
+                return "[/\\\\]+"
+            elif before_context.endswith(')'):
+                # Comes directly after another capturing group (like extension after suffix)
+                return ""
+            elif '_)?' in before_context or 'chunk)_)?' in before_context:
+                # Comes after underscore separators from entities (like suffix)
+                return "[_/\\\\]+"
+            else:
+                # Analyze position in full pattern to determine context
+                entity_pos = full_pattern.find(f'(?P<{entity_name}>')
+                pattern_before = full_pattern[:entity_pos]
+                
+                # Count directory structure elements before this construct
+                slash_count = pattern_before.count('/')
+                
+                if slash_count <= 2:
+                    # In directory part - use slash separators
+                    return "[/\\\\]+"
+                else:
+                    # In filename part - analyze further
+                    if ')(?P<' in pattern_before[-20:]:
+                        # Directly after another construct
+                        return ""
+                    else:
+                        # After separator
+                        return "[_/\\\\]+"
+        
+        # Extract the actual prefix used in the schema pattern  
+        # Look for patterns like 'sub-(?P<subject>' or '_task-(?P<task>'
+        prefix_pattern = re.search(rf'([/_]?)([a-zA-Z]+)-\(\?P<{entity_name}>', full_pattern)
+        if prefix_pattern:
+            separator_char = prefix_pattern.group(1)
+            prefix = prefix_pattern.group(2)
+            
+            if separator_char == '/':
+                return f"[/\\\\]+{prefix}-"
+            elif separator_char == '_':
+                return f"[_/\\\\]+{prefix}-" 
+            else:  # Direct prefix (no separator before)
+                # Check position to determine separator type
+                entity_pos = full_pattern.find(f'(?P<{entity_name}>')
+                pattern_before = full_pattern[:entity_pos]
+                slash_count = pattern_before.count('/')
+                
+                if slash_count <= 2:  # Directory part
+                    return f"[/\\\\]+{prefix}-"
+                else:  # Filename part  
+                    return f"[_/\\\\]+{prefix}-"
+        
+        # Try to extract any entity prefix from before_context
+        prefix_match = re.search(r'([a-zA-Z]+)-$', before_context)
+        if prefix_match:
+            prefix = prefix_match.group(1)
+            # Verify this is a real entity prefix from schema
+            for entity in schema.objects.entities.values():
+                if entity.get('name') == prefix:
+                    if '/' in before_context:
+                        return f"[/\\\\]+{prefix}-"
+                    else:
+                        return f"[_/\\\\]+{prefix}-"
+        
+        # For entities with prefixes, extract from schema pattern
+        broad_match = re.search(rf'([a-zA-Z]+)-\(\?P<{entity_name}>', full_pattern)
+        if broad_match:
+            prefix = broad_match.group(1)
+            
+            # Determine separator type from position in pattern
+            entity_pos = full_pattern.find(f'(?P<{entity_name}>')
+            pattern_before_entity = full_pattern[:entity_pos]
+            slash_count = pattern_before_entity.count('/')
+            
+            if slash_count <= 2:  # Directory part
+                return f"[/\\\\]+{prefix}-"
+            else:  # Filename part
+                return f"[_/\\\\]+{prefix}-"
+        
+        # If no prefix pattern found, this entity shouldn't have a prefix
+        # This handles cases where entities appear directly without prefix-
+        return f"[_/\\\\]+{entity_name}-"  # Final fallback
+    
+    @staticmethod
+    def _extract_directory_config(entity_name, pattern, schema):
+        """Extract directory configuration from schema directory rules."""
+        
+        # Get directory rules from schema
+        if not hasattr(schema.rules, 'directories') or not hasattr(schema.rules.directories, 'raw'):
+            return None
+            
+        dir_rules = schema.rules.directories.raw
+        
+        # Check if this entity has a directory rule
+        for rule_name, rule in dir_rules.items():
+            rule_dict = dict(rule)
+            if rule_dict.get('entity') == entity_name:
+                # This entity creates directories
+                
+                # Build directory template based on schema hierarchy
+                # Find parent entities by checking what directories can contain this entity
+                parent_entities = []
+                
+                # Check all directory rules to see which ones list this entity in subdirs
+                for parent_rule_name, parent_rule in dir_rules.items():
+                    parent_rule_dict = dict(parent_rule)
+                    subdirs = parent_rule_dict.get('subdirs', [])
+                    
+                    # Check if this entity is in parent's subdirs
+                    for subdir in subdirs:
+                        if isinstance(subdir, dict) and 'oneOf' in subdir:
+                            if entity_name in subdir['oneOf']:
+                                if parent_rule_dict.get('entity'):
+                                    parent_entities.append(parent_rule_dict['entity'])
+                        elif subdir == entity_name:
+                            if parent_rule_dict.get('entity'):
+                                parent_entities.append(parent_rule_dict['entity'])
+                
+                # Build template with parent hierarchy
+                template_parts = []
+                template_parts.extend([f'{{{parent}}}' for parent in parent_entities])
+                template_parts.append(f'{{{entity_name}}}')
+                
+                return ''.join(template_parts)
+        
+        return None
+
+    @staticmethod
+    def _entity_appears_in_all_modalities(entity_name, schema):
+        """Check if an entity appears in ALL modalities in the schema.
+        
+        Entities that appear in all modalities (like suffix, extension) should have
+        their values combined across all modalities to ensure complete coverage.
+        
+        Parameters
+        ----------
+        entity_name : str
+            The entity name to check
+        schema : object
+            The BIDS schema object
+            
+        Returns
+        -------
+        bool
+            True if entity appears in all modalities, False otherwise
+        """
+        # Get all modalities
+        all_modalities = list(schema.rules.files.raw.keys())
+        if not all_modalities:
+            return False
+            
+        modalities_with_entity = set()
+        
+        # Check each modality for this entity
+        for modality in all_modalities:
+            datatype_rules = getattr(schema.rules.files.raw, modality)
+            patterns = rules.regexify_filename_rules(datatype_rules, schema, level=1)
+            
+            # Check if entity appears in any pattern for this modality
+            for pattern in patterns:
+                entities_in_pattern = re.findall(r'\(\?P<([^>]+)>', pattern['regex'])
+                if entity_name in entities_in_pattern:
+                    modalities_with_entity.add(modality)
+                    break  # Found in this modality, move to next
+        
+        # Return True if entity appears in ALL modalities
+        return len(modalities_with_entity) == len(all_modalities)
+
+
+
     def __repr__(self):
         return f"<Config {self.name}>"
 
diff --git a/src/bids/layout/tests/test_config_compatibility.py b/src/bids/layout/tests/test_config_compatibility.py
new file mode 100644
index 00000000..b9904f04
--- /dev/null
+++ b/src/bids/layout/tests/test_config_compatibility.py
@@ -0,0 +1,378 @@
+"""Tests to verify schema-driven config matches old JSON config behavior."""
+
+import pytest
+import re
+from bids.layout.models import Config
+
+
+class TestConfigCompatibility:
+    """Test that new schema-driven config matches old JSON config behavior."""
+    
+    def test_core_entities_present_in_both_configs(self):
+        """Test that essential BIDS entities are present in both configs."""
+        old_config = Config.load('bids')
+        new_config = Config.load('bids-schema')
+        
+        # Test that both configs support the fundamental BIDS entities
+        core_entities = ['subject', 'session', 'task', 'run', 'acquisition']
+        
+        # Check core entities exist (allowing for naming differences)
+        old_has_core = all(entity in old_config.entities for entity in core_entities)
+        new_has_core = all(entity in new_config.entities for entity in core_entities)
+        
+        assert old_has_core and new_has_core, "Both configs should support core BIDS entities"
+    
+    def test_schema_entities_are_superset_of_valid_old_entities(self):
+        """Test that schema includes all valid entities from old config."""
+        old_config = Config.load('bids')
+        new_config = Config.load('bids-schema')
+        
+        # Map old config entities to their schema equivalents
+        entity_mapping = {
+            'inv': 'inversion',
+            'mt': 'mtransfer', 
+            'proc': 'processing',
+            # 'fmap', 'scans', 'staining' are invalid - exclude them
+        }
+        
+        valid_old_entities = set(old_config.entities.keys()) - {'fmap', 'scans', 'staining'}
+        missing_from_new = []
+        
+        for old_entity in valid_old_entities:
+            expected_new_entity = entity_mapping.get(old_entity, old_entity)
+            if expected_new_entity not in new_config.entities:
+                missing_from_new.append(f"{old_entity} -> {expected_new_entity}")
+        
+        assert len(missing_from_new) == 0, f"Schema missing valid entities: {missing_from_new}"
+    
+    def test_suffix_coverage_matches(self):
+        """Test that suffix patterns cover the same suffixes."""
+        #pytest.skip("Old config uses generic catch-all pattern while new config uses explicit schema suffixes. "
+        #            "Cannot meaningfully compare pattern extraction. See test_new_suffixes_compatible_with_old_pattern instead.")
+        
+        # Original implementation preserved for reference
+        old_config = Config.load('bids')
+        new_config = Config.load('bids-schema')
+        
+        # Extract suffixes from patterns
+        old_suffixes = self._extract_suffixes_from_pattern(old_config.entities['suffix'].pattern)
+        new_suffixes = self._extract_suffixes_from_pattern(new_config.entities['suffix'].pattern)
+        
+        missing = old_suffixes - new_suffixes
+        extra = new_suffixes - old_suffixes
+        
+        error_msg = []
+        if missing:
+            error_msg.append(f"Missing suffixes: {sorted(missing)}")
+        if extra:
+            error_msg.append(f"Extra suffixes: {sorted(extra)}")
+        
+        # Core BIDS suffixes should be preserved
+        core_suffixes = {'bold', 'T1w', 'T2w', 'dwi', 'events'}
+        assert core_suffixes.issubset(new_suffixes), f"Missing core suffixes; {'; '.join(error_msg)}"
+        
+        # Report coverage for analysis
+        coverage = len(old_suffixes & new_suffixes) / len(old_suffixes) if old_suffixes else 0
+        if coverage < 1.0 or missing or extra:
+            pytest.fail(f"Suffix coverage: {coverage:.1%}; {'; '.join(error_msg)}")
+    
+    def test_new_suffixes_compatible_with_old_pattern(self):
+        """Test that all new schema suffixes would be accepted by old generic pattern."""
+        old_config = Config.load('bids')
+        new_config = Config.load('bids-schema')
+        
+        # Get old pattern (generic) and new suffixes (explicit)
+        old_pattern = old_config.entities['suffix'].regex
+        new_suffixes = self._extract_suffixes_from_pattern(new_config.entities['suffix'].pattern)
+        
+        # Test that old pattern would accept all new suffixes
+        incompatible_suffixes = []
+        for suffix in new_suffixes:
+            test_filename = f"sub-01_task-test_{suffix}.nii.gz"
+            if not old_pattern.search(test_filename):
+                incompatible_suffixes.append(suffix)
+        
+        assert len(incompatible_suffixes) == 0, f"Old pattern would reject these valid schema suffixes: {incompatible_suffixes}"
+    
+    def test_extension_coverage_matches(self):
+        """Test that extension patterns cover the same extensions."""
+        #pytest.skip("Old config uses generic catch-all pattern while new config uses explicit schema extensions. "
+        #            "Cannot meaningfully compare pattern extraction. See test_new_extensions_compatible_with_old_pattern instead.")
+        
+        # Original implementation preserved for reference
+        old_config = Config.load('bids')
+        new_config = Config.load('bids-schema')
+        
+        # Extract extensions from patterns
+        old_extensions = self._extract_extensions_from_pattern(old_config.entities['extension'].pattern)
+        new_extensions = self._extract_extensions_from_pattern(new_config.entities['extension'].pattern)
+        
+        missing = old_extensions - new_extensions
+        extra = new_extensions - old_extensions
+        
+        error_msg = []
+        if missing:
+            error_msg.append(f"Missing extensions: {sorted(missing)}")
+        if extra:
+            error_msg.append(f"Extra extensions: {sorted(extra)}")
+        
+        # Core extensions should be preserved
+        core_extensions = {'.nii.gz', '.nii', '.tsv', '.json'}
+        assert core_extensions.issubset(new_extensions), f"Missing core extensions; {'; '.join(error_msg)}"
+        
+        # Report coverage for analysis
+        coverage = len(old_extensions & new_extensions) / len(old_extensions) if old_extensions else 0
+        if coverage < 1.0 or missing or extra:
+            pytest.fail(f"Extension coverage: {coverage:.1%}; {'; '.join(error_msg)}")
+    
+    def test_new_extensions_compatible_with_old_pattern(self):
+        """Test that all new schema extensions would be accepted by old generic pattern."""
+        old_config = Config.load('bids')
+        new_config = Config.load('bids-schema')
+        
+        # Get old pattern (generic) and new extensions (explicit)
+        old_pattern = old_config.entities['extension'].regex
+        new_extensions = self._extract_extensions_from_pattern(new_config.entities['extension'].pattern)
+        
+        # Test that old pattern would accept all new extensions
+        incompatible_extensions = []
+        directory_extensions = []
+        
+        for extension in new_extensions:
+            test_filename = f"sub-01_task-test_bold{extension}"
+            if not old_pattern.search(test_filename):
+                if extension.endswith('/'):
+                    # Directory-style extensions are a new BIDS feature not supported by old pattern
+                    directory_extensions.append(extension)
+                else:
+                    incompatible_extensions.append(extension)
+        
+        # Core file extensions should be compatible
+        assert len(incompatible_extensions) == 0, f"Old pattern would reject these valid schema file extensions: {incompatible_extensions}"
+        
+        # Report directory extensions as expected difference
+        if directory_extensions:
+            print(f"Note: {len(directory_extensions)} directory-style extensions are new BIDS features: {directory_extensions}")
+        
+        # Core extensions should be present
+        core_extensions = {'.nii.gz', '.nii', '.tsv', '.json'}
+        assert core_extensions.issubset(new_extensions), f"Missing core extensions from new config"
+    
+    def test_datatype_coverage_matches(self):
+        """Test that datatype patterns cover the same datatypes."""
+        old_config = Config.load('bids')
+        new_config = Config.load('bids-schema')
+        
+        # Extract datatypes from patterns
+        old_datatypes = self._extract_datatypes_from_pattern(old_config.entities['datatype'].pattern)
+        new_datatypes = self._extract_datatypes_from_pattern(new_config.entities['datatype'].pattern)
+        
+        missing = old_datatypes - new_datatypes
+        extra = new_datatypes - old_datatypes
+        
+        error_msg = []
+        if missing:
+            error_msg.append(f"Missing datatypes: {sorted(missing)}")
+        if extra:
+            error_msg.append(f"Extra datatypes: {sorted(extra)}")
+        
+        # Core datatypes should be preserved
+        core_datatypes = {'anat', 'func', 'dwi', 'fmap'}
+        assert core_datatypes.issubset(new_datatypes), f"Missing core datatypes; {'; '.join(error_msg)}"
+        
+        # New should have at least as many as old (schema grows)
+        assert len(new_datatypes) >= len(old_datatypes), f"Fewer datatypes than old config; {'; '.join(error_msg)}"
+    
+    def test_functional_filename_parsing_compatibility(self):
+        """Test that both configs can parse BIDS filenames and extract core entities."""
+        old_config = Config.load('bids')
+        new_config = Config.load('bids-schema')
+        
+        # Test core entity extraction (skip extension due to pattern differences)
+        test_cases = [
+            {
+                'filename': '/dataset/sub-01/func/sub-01_task-rest_bold.nii.gz',
+                'core_entities': {'subject': '01', 'task': 'rest', 'suffix': 'bold'}
+            },
+            {
+                'filename': '/dataset/sub-02/ses-pre/anat/sub-02_ses-pre_T1w.nii.gz', 
+                'core_entities': {'subject': '02', 'session': 'pre', 'suffix': 'T1w'}
+            }
+        ]
+        
+        for test_case in test_cases:
+            filename = test_case['filename']
+            expected = test_case['core_entities']
+            
+            old_entities = self._parse_filename_entities(filename, old_config)
+            new_entities = self._parse_filename_entities(filename, new_config)
+            
+            # Test that both configs extract the core BIDS entities
+            for key, expected_value in expected.items():
+                old_extracted = old_entities.get(key)
+                new_extracted = new_entities.get(key)
+                
+                # Both should extract the same core information
+                assert old_extracted == expected_value, f"Old config failed to extract {key}={expected_value} from {filename}, got {old_extracted}"
+                assert new_extracted == expected_value, f"New config failed to extract {key}={expected_value} from {filename}, got {new_extracted}"
+        
+        # Test that new config has valid patterns
+        assert len(new_config.entities) > 20, "New config should have many entities"
+        assert 'subject' in new_config.entities, "New config missing subject entity"
+        assert 'suffix' in new_config.entities, "New config missing suffix entity"
+    
+    def test_new_config_parses_schema_compliant_filenames(self):
+        """Test that new config correctly parses filenames with schema entities."""
+        new_config = Config.load('bids-schema')
+        
+        # Test files that use entities only available in schema (not old config)
+        schema_specific_cases = [
+            '/dataset/sub-01/anat/sub-01_hemi-L_T1w.nii.gz',  # hemisphere entity
+            '/dataset/sub-01/anat/sub-01_desc-preproc_T1w.nii.gz',  # description entity  
+            '/dataset/sub-01/anat/sub-01_res-native_T1w.nii.gz',  # resolution entity
+        ]
+        
+        for filename in schema_specific_cases:
+            entities = self._parse_filename_entities(filename, new_config)
+            
+            # Should extract subject and suffix at minimum
+            assert 'subject' in entities, f"Failed to extract subject from {filename}"
+            assert 'suffix' in entities, f"Failed to extract suffix from {filename}"
+            
+            # Should extract the schema-specific entity
+            if 'hemi-' in filename:
+                assert 'hemisphere' in entities, f"Failed to extract hemisphere from {filename}"
+            if 'desc-' in filename:
+                assert 'description' in entities, f"Failed to extract description from {filename}"
+            if 'res-' in filename:
+                assert 'resolution' in entities, f"Failed to extract resolution from {filename}"
+    
+    def test_pattern_structure_similarity(self):
+        """Test that pattern structures are similar between old and new."""
+        old_config = Config.load('bids')
+        new_config = Config.load('bids-schema')
+        
+        # Test key entities have reasonable pattern structures
+        key_entities = ['subject', 'session', 'task', 'run', 'suffix', 'extension']
+        
+        for entity_name in key_entities:
+            if entity_name in old_config.entities and entity_name in new_config.entities:
+                old_pattern = old_config.entities[entity_name].pattern
+                new_pattern = new_config.entities[entity_name].pattern
+                
+                # Both should be non-empty
+                assert len(old_pattern) > 0, f"Old {entity_name} pattern is empty"
+                assert len(new_pattern) > 0, f"New {entity_name} pattern is empty"
+                
+                # Both should contain capturing groups
+                old_groups = len(re.findall(r'\([^)]*\)', old_pattern))
+                new_groups = len(re.findall(r'\([^)]*\)', new_pattern))
+                
+                assert old_groups > 0, f"Old {entity_name} pattern has no groups"
+                assert new_groups > 0, f"New {entity_name} pattern has no groups"
+    
+    def test_config_names_and_metadata(self):
+        """Test config metadata is reasonable."""
+        old_config = Config.load('bids')
+        new_config = Config.load('bids-schema')
+        
+        # Old config should have simple name
+        assert old_config.name == 'bids'
+        
+        # New config should include version info
+        assert new_config.name.startswith('bids-schema-')
+        assert len(new_config.name) > len('bids-schema-')
+        
+        # Both should have reasonable entity counts
+        assert len(old_config.entities) > 10, "Old config has too few entities"
+        assert len(new_config.entities) > 10, "New config has too few entities"
+        
+        # Report entity count ratio for analysis
+        ratio = len(new_config.entities) / len(old_config.entities)
+        print(f"Entity count ratio (new/old): {ratio:.2f}")
+    
+    # Helper methods
+    def _extract_suffixes_from_pattern(self, pattern):
+        """Extract suffixes from a pattern like [_/\\]+(suffix1|suffix2|...)"""
+        suffixes = set()
+        
+        # Look for alternation groups
+        matches = re.findall(r'\(([^)]+)\)', pattern)
+        for match in matches:
+            if '|' in match:
+                suffixes.update(match.split('|'))
+            else:
+                suffixes.add(match)
+        
+        return suffixes
+    
+    def _extract_extensions_from_pattern(self, pattern):
+        """Extract extensions from a pattern."""
+        extensions = set()
+        
+        # Look for patterns with dots
+        matches = re.findall(r'\\?\.\w+(?:\\?\.\w+)*', pattern)
+        for match in matches:
+            # Clean up regex escaping
+            clean_ext = match.replace('\\', '')
+            extensions.add(clean_ext)
+        
+        # Also look for alternation groups
+        alt_matches = re.findall(r'\(([^)]+)\)', pattern)
+        for match in alt_matches:
+            if '|' in match and '.' in match:
+                for item in match.split('|'):
+                    if '.' in item:
+                        clean_item = item.replace('\\', '')
+                        extensions.add(clean_item)
+        
+        return extensions
+    
+    def _extract_datatypes_from_pattern(self, pattern):
+        """Extract datatypes from a pattern like [/\\]+(datatype1|datatype2|...)"""
+        datatypes = set()
+        
+        # Look for alternation groups
+        matches = re.findall(r'\(([^)]+)\)', pattern)
+        for match in matches:
+            if '|' in match:
+                datatypes.update(match.split('|'))
+            else:
+                datatypes.add(match)
+        
+        return datatypes
+    
+    def _parse_filename_entities(self, filename, config):
+        """Parse entities from filename using config patterns."""
+        entities = {}
+        
+        for entity_name, entity in config.entities.items():
+            if hasattr(entity, 'regex') and entity.regex:
+                match = entity.regex.search(filename)
+                if match and len(match.groups()) > 0:
+                    entities[entity_name] = match.group(1)
+        
+        return entities
+    
+    def _map_entity_name_old_to_new(self, new_entity_name):
+        """Map new entity names to old entity names for compatibility testing."""
+        # Old config sometimes used different names
+        name_mapping = {
+            'subject': 'subject',
+            'session': 'session', 
+            'task': 'task',
+            'run': 'run',
+            'acquisition': 'acquisition',
+            'suffix': 'suffix',
+            'extension': 'extension',
+            # Add other mappings as needed
+        }
+        return name_mapping.get(new_entity_name, new_entity_name)
+
+
+if __name__ == '__main__':
+    # Allow running as script for quick testing
+    test_instance = TestConfigCompatibility()
+    test_instance.test_entity_coverage_matches()
+    test_instance.test_suffix_coverage_matches()
+    test_instance.test_filename_parsing_compatibility()
diff --git a/src/bids/layout/tests/test_schema_accuracy.py b/src/bids/layout/tests/test_schema_accuracy.py
new file mode 100644
index 00000000..ebfef9af
--- /dev/null
+++ b/src/bids/layout/tests/test_schema_accuracy.py
@@ -0,0 +1,197 @@
+"""Tests to verify schema patterns match actual schema content."""
+
+from bids.layout.models import Config
+from bidsschematools import schema as bst_schema
+
+
+class TestSchemaAccuracy:
+    """Test that patterns accurately reflect BIDS schema content."""
+    
+    def test_extension_patterns_match_schema(self):
+        """Test that extension patterns match schema extensions."""
+        config = Config.load('bids-schema')
+        
+        # Load schema directly
+        bids_schema = bst_schema.load_schema()
+        
+        # Get extension entity
+        extension_entity = config.entities['extension']
+        
+        # Test that schema extensions are matched (excluding wildcards)
+        schema_extensions = [ext['value'] for ext in bids_schema.objects.extensions.values() 
+                           if ext.get('value') and ext['value'].startswith('.') and ext['value'] not in ('.*',)]
+        
+        # Collect all failures
+        matched_extensions = []
+        unmatched_extensions = []
+        
+        test_files = [f'test{ext}' for ext in schema_extensions]
+        for ext, test_file in zip(schema_extensions, test_files):
+            match = extension_entity.regex.search(test_file)
+            if match is not None:
+                matched_extensions.append(ext)
+            else:
+                unmatched_extensions.append(ext)
+        
+        # Report all differences at once
+        coverage = len(matched_extensions) / len(schema_extensions) * 100 if schema_extensions else 0
+        
+        error_msg = []
+        error_msg.append(f"Extension coverage: {coverage:.1f}% ({len(matched_extensions)}/{len(schema_extensions)})")
+        if unmatched_extensions:
+            error_msg.append(f"Unmatched extensions ({len(unmatched_extensions)}): {unmatched_extensions[:20]}")
+            if len(unmatched_extensions) > 20:
+                error_msg.append(f"... and {len(unmatched_extensions) - 20} more")
+        if matched_extensions:
+            error_msg.append(f"Matched extensions ({len(matched_extensions)}): {matched_extensions[:10]}")
+            if len(matched_extensions) > 10:
+                error_msg.append(f"... and {len(matched_extensions) - 10} more")
+        
+        assert len(unmatched_extensions) == 0, "; ".join(error_msg)
+    
+    def test_suffix_patterns_match_schema(self):
+        """Test that suffix patterns match schema suffixes."""
+        config = Config.load('bids-schema')
+        
+        # Load schema directly
+        bids_schema = bst_schema.load_schema()
+        
+        # Get suffix entity
+        suffix_entity = config.entities['suffix']
+        
+        # Test that schema suffixes are matched using actual suffix values, not keys
+        schema_suffix_values = [obj['value'] for obj in bids_schema.objects.suffixes.values() 
+                               if obj.get('value')]
+        
+        # Collect all failures
+        matched_suffixes = []
+        unmatched_suffixes = []
+        
+        # Test all suffixes using their actual values
+        test_files = [f'_{suffix}.nii.gz' for suffix in schema_suffix_values]
+        for suffix, test_file in zip(schema_suffix_values, test_files):
+            match = suffix_entity.regex.search(test_file)
+            if match is not None:
+                matched_suffixes.append(suffix)
+            else:
+                unmatched_suffixes.append(suffix)
+        
+        # Report all differences at once
+        coverage = len(matched_suffixes) / len(schema_suffix_values) * 100 if schema_suffix_values else 0
+        
+        error_msg = []
+        error_msg.append(f"Suffix coverage: {coverage:.1f}% ({len(matched_suffixes)}/{len(schema_suffix_values)})")
+        if unmatched_suffixes:
+            error_msg.append(f"Unmatched suffixes ({len(unmatched_suffixes)}): {unmatched_suffixes[:20]}")
+            if len(unmatched_suffixes) > 20:
+                error_msg.append(f"... and {len(unmatched_suffixes) - 20} more")
+        if matched_suffixes:
+            error_msg.append(f"Matched suffixes ({len(matched_suffixes)}): {matched_suffixes[:10]}")
+            if len(matched_suffixes) > 10:
+                error_msg.append(f"... and {len(matched_suffixes) - 10} more")
+        
+        assert len(unmatched_suffixes) == 0, "; ".join(error_msg)
+    
+    def test_datatype_patterns_match_schema(self):
+        """Test that datatype patterns match schema datatypes."""
+        config = Config.load('bids-schema')
+        
+        # Load schema directly
+        bids_schema = bst_schema.load_schema()
+        
+        # Get datatype entity
+        datatype_entity = config.entities['datatype']
+        
+        # Test that schema datatypes are matched
+        schema_datatypes = list(bids_schema.objects.datatypes.keys())
+        
+        # Collect all failures
+        matched_datatypes = []
+        unmatched_datatypes = []
+        
+        test_paths = [f'/{dtype}/' for dtype in schema_datatypes]
+        for dtype, test_path in zip(schema_datatypes, test_paths):
+            match = datatype_entity.regex.search(test_path)
+            if match is not None:
+                matched_datatypes.append(dtype)
+            else:
+                unmatched_datatypes.append(dtype)
+        
+        # Report all differences at once
+        coverage = len(matched_datatypes) / len(schema_datatypes) * 100 if schema_datatypes else 0
+        
+        error_msg = []
+        error_msg.append(f"Datatype coverage: {coverage:.1f}% ({len(matched_datatypes)}/{len(schema_datatypes)})")
+        if unmatched_datatypes:
+            error_msg.append(f"Unmatched datatypes ({len(unmatched_datatypes)}): {unmatched_datatypes}")
+        if matched_datatypes:
+            error_msg.append(f"Matched datatypes ({len(matched_datatypes)}): {matched_datatypes}")
+        
+        assert len(unmatched_datatypes) == 0, "; ".join(error_msg)
+    
+    def test_entity_format_patterns_match_schema(self):
+        """Test that entity patterns use actual schema format patterns."""
+        config = Config.load('bids-schema')
+        
+        # Load schema directly
+        bids_schema = bst_schema.load_schema()
+        
+        # Test a few key entities
+        test_entities = [
+            ('subject', 'label'),
+            ('run', 'index'),
+            ('echo', 'index'),
+        ]
+        
+        for entity_name, expected_format in test_entities:
+            if entity_name in config.entities:
+                entity = config.entities[entity_name]
+                
+                # Get expected pattern from schema
+                format_obj = bids_schema.objects.formats[expected_format]
+                expected_pattern = format_obj['pattern']
+                
+                # Check that our entity pattern contains the schema pattern
+                assert expected_pattern in entity.pattern, \
+                    f"Entity {entity_name} pattern doesn't contain schema format pattern"
+    
+    def test_no_hardcoded_values(self):
+        """Test that we don't have hardcoded schema values anymore."""
+        config = Config.load('bids-schema')
+        
+        # Load schema directly to get actual counts
+        bids_schema = bst_schema.load_schema()
+        
+        # We should have more entities than before because we're not missing any
+        assert len(config.entities) >= len(bids_schema.objects.entities)
+        
+        # Extension pattern should include schema extensions
+        extension_entity = config.entities['extension']
+        schema_extension_count = len([ext for ext in bids_schema.objects.extensions.values() 
+                                    if ext.get('value') and ext['value'].startswith('.')])
+        
+        # Pattern should include many extensions (not just a generic catch-all)
+        assert len(extension_entity.pattern) > 100, "Extension pattern seems too simple"
+        
+        # Suffix pattern should include schema suffixes
+        suffix_entity = config.entities['suffix']
+        schema_suffix_count = len(bids_schema.objects.suffixes)
+        
+        # Pattern should include many suffixes
+        assert len(suffix_entity.pattern) > 200, "Suffix pattern seems too simple"
+        
+        print(f"Schema extensions used: {schema_extension_count}")
+        print(f"Schema suffixes used: {schema_suffix_count}")
+        print(f"Schema datatypes used: {len(bids_schema.objects.datatypes)}")
+        print(f"Schema entities used: {len(bids_schema.objects.entities)}")
+
+
+if __name__ == '__main__':
+    # Allow running as script for quick testing
+    test_instance = TestSchemaAccuracy()
+    test_instance.test_extension_patterns_match_schema()
+    test_instance.test_suffix_patterns_match_schema()
+    test_instance.test_datatype_patterns_match_schema()
+    test_instance.test_entity_format_patterns_match_schema()
+    test_instance.test_no_hardcoded_values()
+    print("All schema accuracy tests passed!")
\ No newline at end of file
diff --git a/src/bids/layout/tests/test_schema_config.py b/src/bids/layout/tests/test_schema_config.py
new file mode 100644
index 00000000..088d456d
--- /dev/null
+++ b/src/bids/layout/tests/test_schema_config.py
@@ -0,0 +1,116 @@
+"""Tests for schema-based Config loading."""
+
+import pytest
+
+from bids.layout.models import Config
+
+
+class TestSchemaConfig:
+    """Test schema-based configuration loading."""
+    
+    def test_load_bids_schema_basic(self):
+        """Test loading config from BIDS schema."""
+        config = Config.load('bids-schema')
+        
+        # Basic checks
+        assert config.name.startswith('bids-schema-')
+        assert len(config.entities) > 0
+        
+        # Check that standard entities are present (using full names per PyBIDS convention)
+        entity_names = {e.name for e in config.entities.values()}
+        expected_entities = {'subject', 'session', 'task', 'run'}
+        assert expected_entities.issubset(entity_names)
+        
+        # Check that PyBIDS-specific entities are present
+        pybids_entities = {'extension', 'suffix', 'datatype'}
+        assert pybids_entities.issubset(entity_names)
+    
+    def test_schema_entity_patterns(self):
+        """Test that entity patterns are correctly generated."""
+        config = Config.load('bids-schema')
+        
+        # Test subject entity (directory-based)
+        if 'subject' in config.entities:
+            subject_entity = config.entities['subject']
+            assert subject_entity.regex is not None
+            # Should match /sub-01/ or similar
+            match = subject_entity.regex.search('/sub-01/')
+            assert match is not None
+            assert match.group(1) == '01'
+        
+        # Test task entity (file-based)
+        if 'task' in config.entities:
+            task_entity = config.entities['task']
+            assert task_entity.regex is not None
+            # Should match _task-rest_ or similar
+            match = task_entity.regex.search('_task-rest_')
+            assert match is not None
+            assert match.group(1) == 'rest'
+    
+    def test_schema_version_tracking(self):
+        """Test that schema version is tracked in config name."""
+        config = Config.load('bids-schema')
+        
+        # Config name should include version
+        assert 'bids-schema-' in config.name
+        # Should have some version number after the dash
+        version_part = config.name.split('bids-schema-')[1]
+        assert len(version_part) > 0
+    
+    def test_schema_version_parameter(self):
+        """Test loading specific schema version (future feature)."""
+        # This should work but use the default schema for now
+        config = Config.load({'schema_version': '1.9.0'})
+        
+        assert config.name.startswith('bids-schema-')
+        assert len(config.entities) > 0
+    
+    
+    def test_entity_generation(self):
+        """Test entity pattern generation using real schema."""
+        config = Config.load('bids-schema')
+        
+        # Test that entities were generated
+        assert len(config.entities) > 0
+        
+        # Test specific entity properties we care about (using full names)
+        assert 'subject' in config.entities
+        assert 'task' in config.entities
+        assert 'run' in config.entities
+        
+        # Test that patterns are valid regex
+        for entity_name, entity in config.entities.items():
+            assert entity.regex is not None, f"Entity {entity_name} missing regex"
+            # Test that regex compiles and works
+            assert hasattr(entity.regex, 'search'), f"Entity {entity_name} regex invalid"
+        
+        # Test specific entity patterns work correctly
+        subject_entity = config.entities['subject']
+        assert 'sub-' in subject_entity.pattern
+        
+        # Test subject pattern matches expected format
+        match = subject_entity.regex.search('/sub-01/')
+        assert match is not None
+        assert match.group(1) == '01'
+        
+        # Test task pattern matches expected format  
+        task_entity = config.entities['task']
+        match = task_entity.regex.search('_task-rest_')
+        assert match is not None
+        assert match.group(1) == 'rest'
+        
+        # Test run pattern matches expected format
+        run_entity = config.entities['run']
+        match = run_entity.regex.search('_run-02_')
+        assert match is not None
+        assert match.group(1) == '02'
+    
+
+
+if __name__ == '__main__':
+    # Allow running as script for quick testing
+    test_instance = TestSchemaConfig()
+    test_instance.test_load_bids_schema_basic()
+    test_instance.test_schema_entity_patterns()
+    test_instance.test_schema_version_tracking()
+    print("Basic tests passed!")
\ No newline at end of file

From 8aa069c8711fe5d13e805d1e54a935d43205c597 Mon Sep 17 00:00:00 2001
From: Ashley Stewart <a.stewart.au@gmail.com>
Date: Tue, 9 Sep 2025 16:03:09 +1000
Subject: [PATCH 02/10] Support specifying bids schema version

---
 pyproject.toml                                |   2 +-
 src/bids/layout/layout.py                     |   3 +-
 src/bids/layout/models.py                     |  36 ++++++-
 .../layout/tests/test_config_compatibility.py |  62 -----------
 src/bids/layout/tests/test_schema_config.py   |   8 +-
 .../tests/test_schema_version_differences.py  | 100 ++++++++++++++++++
 6 files changed, 142 insertions(+), 69 deletions(-)
 create mode 100644 src/bids/layout/tests/test_schema_version_differences.py

diff --git a/pyproject.toml b/pyproject.toml
index 18aad135..962c4634 100644
--- a/pyproject.toml
+++ b/pyproject.toml
@@ -30,7 +30,7 @@ dependencies = [
   "formulaic >=0.3",
   "sqlalchemy >=1.4.31",
   "bids-validator>=1.14.7",  # Keep up-to-date to ensure support for recent modalities
-  "bidsschematools>=0.10",  # Required for schema-based configs
+  "bidsschematools>=1.1.0",  # Required for schema-based configs
   "num2words >=0.5.10",
   "click >=8.0",
   "universal_pathlib >=0.2.2",
diff --git a/src/bids/layout/layout.py b/src/bids/layout/layout.py
index afa2c399..73b5b1e8 100644
--- a/src/bids/layout/layout.py
+++ b/src/bids/layout/layout.py
@@ -76,7 +76,8 @@ class BIDSLayout:
         file in order to be indexed.
     config : str or list or None, optional
         Optional name(s) of configuration file(s) to use.
-        By default (None), uses 'bids'.
+        By default (None), uses 'bids'. Can also be 'bids-schema' to use
+        the official BIDS schema instead of JSON config files.
     sources : :obj:`bids.layout.BIDSLayout` or list or None, optional
         Optional BIDSLayout(s) from which the current BIDSLayout is derived.
     config_filename : str
diff --git a/src/bids/layout/models.py b/src/bids/layout/models.py
index 48f5a285..d2d1a89c 100644
--- a/src/bids/layout/models.py
+++ b/src/bids/layout/models.py
@@ -165,7 +165,8 @@ def load(cls, config, session=None):
             * A path to a JSON file containing config information
             * A dictionary containing config information
             * 'bids-schema' to load from the official BIDS schema
-            * dict with 'schema_version' key to load specific schema version
+            * dict with 'schema_version' key to load specific BIDS version
+                (e.g., {'schema_version': '1.9.0'})
         session : :obj:`sqlalchemy.orm.session.Session` or None
             An optional SQLAlchemy Session instance.
             If passed, the session is used to check the database for (and
@@ -206,10 +207,39 @@ def load(cls, config, session=None):
 
     @classmethod
     def _from_schema(cls, schema_version=None, session=None):
-        """Load config from BIDS schema - store patterns directly."""
+        """Load config from BIDS schema.
+        
+        Parameters
+        ----------
+        schema_version : str, optional
+            Specific BIDS specification version to load (e.g., "1.9.0", "1.8.0").
+            If None, uses the schema version bundled with bidsschematools.
+        session : :obj:`sqlalchemy.orm.session.Session` or None
+            An optional SQLAlchemy Session instance.
+            
+        Returns
+        -------
+        Config
+            A Config instance populated with entities and patterns
+            extracted from the BIDS schema.
+        """
         from bidsschematools import rules, schema as bidsschema
+        from upath import UPath
         
-        bids_schema = bidsschema.load_schema()
+        # Load schema - either default or specific version
+        if schema_version is None:
+            bids_schema = bidsschema.load_schema()
+        else:
+            # Load specific schema version from versioned URL
+            schema_url = UPath(f"https://bids-specification.readthedocs.io/en/v{schema_version}/schema.json")
+            try:
+                bids_schema = bidsschema.load_schema(schema_url)
+            except Exception as e:
+                raise ValueError(
+                    f"Failed to load BIDS schema version {schema_version}. "
+                    f"Check that the version exists at {schema_url}. "
+                    f"Error: {e}"
+                )
         
         # Collect ALL patterns from bidsschematools (keep existing collection logic)
         all_patterns = []
diff --git a/src/bids/layout/tests/test_config_compatibility.py b/src/bids/layout/tests/test_config_compatibility.py
index b9904f04..f47b3d4a 100644
--- a/src/bids/layout/tests/test_config_compatibility.py
+++ b/src/bids/layout/tests/test_config_compatibility.py
@@ -45,37 +45,6 @@ def test_schema_entities_are_superset_of_valid_old_entities(self):
         
         assert len(missing_from_new) == 0, f"Schema missing valid entities: {missing_from_new}"
     
-    def test_suffix_coverage_matches(self):
-        """Test that suffix patterns cover the same suffixes."""
-        #pytest.skip("Old config uses generic catch-all pattern while new config uses explicit schema suffixes. "
-        #            "Cannot meaningfully compare pattern extraction. See test_new_suffixes_compatible_with_old_pattern instead.")
-        
-        # Original implementation preserved for reference
-        old_config = Config.load('bids')
-        new_config = Config.load('bids-schema')
-        
-        # Extract suffixes from patterns
-        old_suffixes = self._extract_suffixes_from_pattern(old_config.entities['suffix'].pattern)
-        new_suffixes = self._extract_suffixes_from_pattern(new_config.entities['suffix'].pattern)
-        
-        missing = old_suffixes - new_suffixes
-        extra = new_suffixes - old_suffixes
-        
-        error_msg = []
-        if missing:
-            error_msg.append(f"Missing suffixes: {sorted(missing)}")
-        if extra:
-            error_msg.append(f"Extra suffixes: {sorted(extra)}")
-        
-        # Core BIDS suffixes should be preserved
-        core_suffixes = {'bold', 'T1w', 'T2w', 'dwi', 'events'}
-        assert core_suffixes.issubset(new_suffixes), f"Missing core suffixes; {'; '.join(error_msg)}"
-        
-        # Report coverage for analysis
-        coverage = len(old_suffixes & new_suffixes) / len(old_suffixes) if old_suffixes else 0
-        if coverage < 1.0 or missing or extra:
-            pytest.fail(f"Suffix coverage: {coverage:.1%}; {'; '.join(error_msg)}")
-    
     def test_new_suffixes_compatible_with_old_pattern(self):
         """Test that all new schema suffixes would be accepted by old generic pattern."""
         old_config = Config.load('bids')
@@ -94,37 +63,6 @@ def test_new_suffixes_compatible_with_old_pattern(self):
         
         assert len(incompatible_suffixes) == 0, f"Old pattern would reject these valid schema suffixes: {incompatible_suffixes}"
     
-    def test_extension_coverage_matches(self):
-        """Test that extension patterns cover the same extensions."""
-        #pytest.skip("Old config uses generic catch-all pattern while new config uses explicit schema extensions. "
-        #            "Cannot meaningfully compare pattern extraction. See test_new_extensions_compatible_with_old_pattern instead.")
-        
-        # Original implementation preserved for reference
-        old_config = Config.load('bids')
-        new_config = Config.load('bids-schema')
-        
-        # Extract extensions from patterns
-        old_extensions = self._extract_extensions_from_pattern(old_config.entities['extension'].pattern)
-        new_extensions = self._extract_extensions_from_pattern(new_config.entities['extension'].pattern)
-        
-        missing = old_extensions - new_extensions
-        extra = new_extensions - old_extensions
-        
-        error_msg = []
-        if missing:
-            error_msg.append(f"Missing extensions: {sorted(missing)}")
-        if extra:
-            error_msg.append(f"Extra extensions: {sorted(extra)}")
-        
-        # Core extensions should be preserved
-        core_extensions = {'.nii.gz', '.nii', '.tsv', '.json'}
-        assert core_extensions.issubset(new_extensions), f"Missing core extensions; {'; '.join(error_msg)}"
-        
-        # Report coverage for analysis
-        coverage = len(old_extensions & new_extensions) / len(old_extensions) if old_extensions else 0
-        if coverage < 1.0 or missing or extra:
-            pytest.fail(f"Extension coverage: {coverage:.1%}; {'; '.join(error_msg)}")
-    
     def test_new_extensions_compatible_with_old_pattern(self):
         """Test that all new schema extensions would be accepted by old generic pattern."""
         old_config = Config.load('bids')
diff --git a/src/bids/layout/tests/test_schema_config.py b/src/bids/layout/tests/test_schema_config.py
index 088d456d..21abd249 100644
--- a/src/bids/layout/tests/test_schema_config.py
+++ b/src/bids/layout/tests/test_schema_config.py
@@ -58,12 +58,15 @@ def test_schema_version_tracking(self):
         assert len(version_part) > 0
     
     def test_schema_version_parameter(self):
-        """Test loading specific schema version (future feature)."""
-        # This should work but use the default schema for now
+        """Test loading specific schema version."""
+        # Test loading BIDS v1.9.0 schema
         config = Config.load({'schema_version': '1.9.0'})
         
         assert config.name.startswith('bids-schema-')
         assert len(config.entities) > 0
+        # Should include the version in the name
+        assert '1.9.0' in config.name
+    
     
     
     def test_entity_generation(self):
@@ -113,4 +116,5 @@ def test_entity_generation(self):
     test_instance.test_load_bids_schema_basic()
     test_instance.test_schema_entity_patterns()
     test_instance.test_schema_version_tracking()
+    test_instance.test_schema_version_parameter()
     print("Basic tests passed!")
\ No newline at end of file
diff --git a/src/bids/layout/tests/test_schema_version_differences.py b/src/bids/layout/tests/test_schema_version_differences.py
new file mode 100644
index 00000000..37da8cf7
--- /dev/null
+++ b/src/bids/layout/tests/test_schema_version_differences.py
@@ -0,0 +1,100 @@
+"""Tests for differences between BIDS schema versions."""
+
+import pytest
+import tempfile
+import json
+import os
+from pathlib import Path
+
+from bids import BIDSLayout
+from bids.layout.models import Config
+
+
+class TestSchemaVersionDifferences:
+    """Test that different schema versions handle different features correctly."""
+    
+    def test_entity_parsing_version_differences(self):
+        """Test that tracksys entity (added in v1.9.0) works in newer schemas but fails in v1.8.0."""
+        
+        with tempfile.TemporaryDirectory() as temp_dir:
+            dataset_dir = Path(temp_dir) / "test_dataset"
+            dataset_dir.mkdir()
+            
+            # Create minimal dataset with motion file using tracksys entity
+            with open(dataset_dir / "dataset_description.json", "w") as f:
+                json.dump({"Name": "Test", "BIDSVersion": "1.10.1", "Authors": ["Test"]}, f)
+            
+            motion_dir = dataset_dir / "sub-01" / "motion"
+            motion_dir.mkdir(parents=True)
+            (motion_dir / "sub-01_tracksys-imu_motion.tsv").touch()
+            
+            # Test tracksys query across schema versions
+            def test_tracksys_query(config):
+                layout = BIDSLayout(dataset_dir, config=[config])
+                try:
+                    return len(layout.get(tracksys='imu', return_type='filename')) > 0
+                except Exception:
+                    return False
+            
+            # tracksys entity was added in v1.9.0 for motion datatype
+            assert test_tracksys_query('bids-schema'), "Current schema should support tracksys"
+            assert test_tracksys_query({'schema_version': '1.9.0'}), "v1.9.0 should support tracksys (motion added)"
+            assert not test_tracksys_query({'schema_version': '1.8.0'}), "v1.8.0 should NOT support tracksys"
+    
+
+    def test_motion_datatype_evolution(self):
+        """Test that motion datatype (BEP029) support was added in v1.9.0."""
+        
+        # Motion datatype was added in v1.9.0 according to changelog:
+        # v1.9.0: [ENH] Extend BIDS for Motion data (BEP029) #981
+        
+        config_v190 = Config.load({'schema_version': '1.9.0'})
+        config_v180 = Config.load({'schema_version': '1.8.0'})
+        
+        entities_v190 = {e.name for e in config_v190.entities.values()}
+        entities_v180 = {e.name for e in config_v180.entities.values()}
+        
+        # Motion-specific entity 'tracksys' should be in v1.9.0+ but not v1.8.0
+        assert 'tracksys' in entities_v190, "tracksys entity should exist in v1.9.0 (motion datatype was added)"
+        assert 'tracksys' not in entities_v180, "tracksys entity should NOT exist in v1.8.0 (before motion datatype)"
+        
+        print(f"✓ Motion datatype evolution verified:")
+        print(f"  v1.8.0 (before motion): tracksys = {'tracksys' in entities_v180}")
+        print(f"  v1.9.0 (motion added): tracksys = {'tracksys' in entities_v190}")
+        
+        # Test specific motion-related entities that were added
+        motion_entities = {'tracksys'}  # Could expand this list as more motion entities are identified
+        
+        for entity in motion_entities:
+            assert entity in entities_v190, f"Motion entity '{entity}' should exist in v1.9.0+"
+            assert entity not in entities_v180, f"Motion entity '{entity}' should NOT exist in v1.8.0"
+    
+    def test_schema_version_metadata_differences(self):
+        """Test that schema versions have different BIDS version numbers."""
+        
+        # Load different schema versions  
+        config_current = Config.load('bids-schema')
+        config_v190 = Config.load({'schema_version': '1.9.0'})
+        config_v180 = Config.load({'schema_version': '1.8.0'})
+        
+        # Check that config names reflect the versions
+        assert 'bids-schema-' in config_current.name
+        assert '1.9.0' in config_v190.name
+        assert '1.8.0' in config_v180.name
+        
+        print(f"Current config: {config_current.name}")
+        print(f"v1.9.0 config: {config_v190.name}")
+        print(f"v1.8.0 config: {config_v180.name}")
+        
+        # Verify they're different configurations
+        assert config_current.name != config_v190.name
+        assert config_v190.name != config_v180.name
+
+
+if __name__ == '__main__':
+    # Allow running as script for quick testing
+    test_instance = TestSchemaVersionDifferences()
+    test_instance.test_schema_version_entity_differences()
+    test_instance.test_motion_datatype_evolution()
+    test_instance.test_schema_version_metadata_differences()
+    print("Schema version difference tests completed!")
\ No newline at end of file

From b051698f18311c968e097d4899f7d7bfe8a956ce Mon Sep 17 00:00:00 2001
From: Ashley Stewart <a.stewart.au@gmail.com>
Date: Tue, 9 Sep 2025 16:12:58 +1000
Subject: [PATCH 03/10] update names

---
 ...{test_schema_accuracy.py => test_schema_pattern_validation.py} | 0
 ...test_config_compatibility.py => test_schema_vs_json_config.py} | 0
 2 files changed, 0 insertions(+), 0 deletions(-)
 rename src/bids/layout/tests/{test_schema_accuracy.py => test_schema_pattern_validation.py} (100%)
 rename src/bids/layout/tests/{test_config_compatibility.py => test_schema_vs_json_config.py} (100%)

diff --git a/src/bids/layout/tests/test_schema_accuracy.py b/src/bids/layout/tests/test_schema_pattern_validation.py
similarity index 100%
rename from src/bids/layout/tests/test_schema_accuracy.py
rename to src/bids/layout/tests/test_schema_pattern_validation.py
diff --git a/src/bids/layout/tests/test_config_compatibility.py b/src/bids/layout/tests/test_schema_vs_json_config.py
similarity index 100%
rename from src/bids/layout/tests/test_config_compatibility.py
rename to src/bids/layout/tests/test_schema_vs_json_config.py

From fa01063242cc7d614a4c5ed37dade64a3508938d Mon Sep 17 00:00:00 2001
From: Ashley Stewart <a.stewart.au@gmail.com>
Date: Tue, 9 Sep 2025 16:40:39 +1000
Subject: [PATCH 04/10] Remove problematic regex parsing code

---
 src/bids/layout/models.py | 187 +++++++++-----------------------------
 1 file changed, 42 insertions(+), 145 deletions(-)

diff --git a/src/bids/layout/models.py b/src/bids/layout/models.py
index d2d1a89c..272732d8 100644
--- a/src/bids/layout/models.py
+++ b/src/bids/layout/models.py
@@ -305,7 +305,7 @@ def _extract_entity_names_from_rules(cls, bids_schema):
     
     @classmethod
     def _create_entities_from_schema(cls, entity_names, entity_values, bids_schema):
-        """Create Entity objects from schema information."""
+        """Create Entity objects directly from schema information."""
         entities = []
         
         for entity_name in sorted(entity_names):
@@ -316,11 +316,11 @@ def _create_entities_from_schema(cls, entity_names, entity_values, bids_schema):
             special_entities = {'suffix', 'extension', 'datatype'}
             
             if entity_name in special_entities:
-                # These entities don't have prefixes - they appear directly
+                # These entities use explicit value lists from schema rules
                 if entity_name in entity_values and entity_values[entity_name]:
                     # Filter out problematic values for extension entity
                     if entity_name == 'extension':
-                        # Remove empty string and wildcard patterns that would match everything
+                        # Remove empty string and wildcard patterns
                         filtered_values = {v for v in entity_values[entity_name] 
                                          if v and v not in ('.*', '**', '*')}
                         values = '|'.join(sorted(filtered_values))
@@ -333,20 +333,31 @@ def _create_entities_from_schema(cls, entity_names, entity_values, bids_schema):
                         pattern = fr'/({values})/'  # Datatypes are directory names  
                     else:  # suffix
                         pattern = fr'[_/\\\\]({values})\.?'  # Before extension, with optional dot
+                else:
+                    # Fallback for special entities without values
+                    if entity_name == 'extension':
+                        pattern = fr'(\.[^/\\\\]+)\Z'
+                    elif entity_name == 'datatype':
+                        pattern = fr'/([a-z]+)/'
+                    else:  # suffix
+                        pattern = fr'[_/\\\\]([a-zA-Z0-9]+)\.?'
             else:
-                # Regular entities - use schema format patterns directly!
+                # Regular BIDS entities - use schema format patterns directly
                 bids_prefix = entity_spec.get('name', entity_name)
                 format_type = entity_spec.get('format', 'label')
                 
                 # Get the official value pattern from schema formats
+                value_pattern = '[0-9a-zA-Z]+'  # Default pattern
                 if format_type in bids_schema.objects.formats:
                     format_obj = dict(bids_schema.objects.formats[format_type])
-                    value_pattern = format_obj['pattern']  # [0-9a-zA-Z]+, [0-9]+, etc.
+                    value_pattern = format_obj.get('pattern', value_pattern)
                 
-                # Combine prefix + schema format pattern
+                # Determine separator based on entity type
                 if entity_name in ['subject', 'session']:
+                    # Directory-level entities
                     pattern = fr'[/\\\\]+{bids_prefix}-({value_pattern})'
                 else:
+                    # File-level entities
                     pattern = fr'[_/\\\\]+{bids_prefix}-({value_pattern})'
             
             entity_config = {
@@ -392,110 +403,36 @@ def _extract_entity_values_from_rules(cls, bids_schema, entity_names):
     
     
     
-    @staticmethod 
-    def _extract_separator_from_context(entity_name, before_context, after_context, full_pattern, schema):
-        """Extract separator pattern from schema context.
+    @staticmethod
+    def _get_entity_separator_from_schema(entity_name, bids_schema):
+        """Get entity separator pattern directly from schema definition.
         
-        Analyzes the actual schema pattern context to determine what
-        separators are used for this entity by examining the schema regex
-        and entity classification.
+        Uses schema entity definitions and directory rules to determine
+        the correct separator pattern without regex parsing.
         """
-        # Find the entity in the full pattern and extract its actual context
-        entity_match = re.search(rf'(\S*?)(\(\?P<{entity_name}>[^)]+\))', full_pattern)
-        if not entity_match:
-            return f"[_/\\\\]+{entity_name}-"  # Fallback
-        
-        prefix_context = entity_match.group(1)
-        
-        # Determine entity type from schema - is it a real entity or special construct?
-        is_real_entity = entity_name in schema.objects.entities
-        
-        if not is_real_entity:
-            # This is a schema construct (extension/suffix/datatype capturing group)
-            # Analyze the pattern context to determine positioning rules
-            
-            # Analyze from the actual pattern context
-            if '/' in before_context and '/' in after_context:
-                # Appears between directory separators (like datatype)
-                return "[/\\\\]+"
-            elif before_context.endswith(')'):
-                # Comes directly after another capturing group (like extension after suffix)
-                return ""
-            elif '_)?' in before_context or 'chunk)_)?' in before_context:
-                # Comes after underscore separators from entities (like suffix)
-                return "[_/\\\\]+"
-            else:
-                # Analyze position in full pattern to determine context
-                entity_pos = full_pattern.find(f'(?P<{entity_name}>')
-                pattern_before = full_pattern[:entity_pos]
-                
-                # Count directory structure elements before this construct
-                slash_count = pattern_before.count('/')
-                
-                if slash_count <= 2:
-                    # In directory part - use slash separators
-                    return "[/\\\\]+"
-                else:
-                    # In filename part - analyze further
-                    if ')(?P<' in pattern_before[-20:]:
-                        # Directly after another construct
-                        return ""
-                    else:
-                        # After separator
-                        return "[_/\\\\]+"
-        
-        # Extract the actual prefix used in the schema pattern  
-        # Look for patterns like 'sub-(?P<subject>' or '_task-(?P<task>'
-        prefix_pattern = re.search(rf'([/_]?)([a-zA-Z]+)-\(\?P<{entity_name}>', full_pattern)
-        if prefix_pattern:
-            separator_char = prefix_pattern.group(1)
-            prefix = prefix_pattern.group(2)
-            
-            if separator_char == '/':
-                return f"[/\\\\]+{prefix}-"
-            elif separator_char == '_':
-                return f"[_/\\\\]+{prefix}-" 
-            else:  # Direct prefix (no separator before)
-                # Check position to determine separator type
-                entity_pos = full_pattern.find(f'(?P<{entity_name}>')
-                pattern_before = full_pattern[:entity_pos]
-                slash_count = pattern_before.count('/')
-                
-                if slash_count <= 2:  # Directory part
-                    return f"[/\\\\]+{prefix}-"
-                else:  # Filename part  
-                    return f"[_/\\\\]+{prefix}-"
+        # Handle special entities that don't have BIDS prefixes
+        special_entities = {'suffix', 'extension', 'datatype'}
+        if entity_name in special_entities:
+            if entity_name == 'extension':
+                return ''  # Extensions appear at end with no separator
+            elif entity_name == 'datatype':
+                return '/'  # Datatypes are directory names
+            else:  # suffix
+                return '[_/\\\\]+'  # Suffixes have underscore separators
         
-        # Try to extract any entity prefix from before_context
-        prefix_match = re.search(r'([a-zA-Z]+)-$', before_context)
-        if prefix_match:
-            prefix = prefix_match.group(1)
-            # Verify this is a real entity prefix from schema
-            for entity in schema.objects.entities.values():
-                if entity.get('name') == prefix:
-                    if '/' in before_context:
-                        return f"[/\\\\]+{prefix}-"
-                    else:
-                        return f"[_/\\\\]+{prefix}-"
-        
-        # For entities with prefixes, extract from schema pattern
-        broad_match = re.search(rf'([a-zA-Z]+)-\(\?P<{entity_name}>', full_pattern)
-        if broad_match:
-            prefix = broad_match.group(1)
-            
-            # Determine separator type from position in pattern
-            entity_pos = full_pattern.find(f'(?P<{entity_name}>')
-            pattern_before_entity = full_pattern[:entity_pos]
-            slash_count = pattern_before_entity.count('/')
+        # Regular BIDS entities - get info from schema
+        if entity_name not in bids_schema.objects.entities:
+            return f'[_/\\\\]+{entity_name}-'  # Fallback
             
-            if slash_count <= 2:  # Directory part
-                return f"[/\\\\]+{prefix}-"
-            else:  # Filename part
-                return f"[_/\\\\]+{prefix}-"
+        entity_spec = bids_schema.objects.entities[entity_name]
+        bids_prefix = entity_spec.get('name', entity_name)
         
-        # If no prefix pattern found, this entity shouldn't have a prefix
-        # This handles cases where entities appear directly without prefix-
-        return f"[_/\\\\]+{entity_name}-"  # Final fallback
+        # Directory-level entities (subject, session) use slash separators
+        if entity_name in ['subject', 'session']:
+            return f'[/\\\\]+{bids_prefix}-'
+        else:
+            # File-level entities use underscore separators  
+            return f'[_/\\\\]+{bids_prefix}-'
     
     @staticmethod
     def _extract_directory_config(entity_name, pattern, schema):
@@ -541,46 +478,6 @@ def _extract_directory_config(entity_name, pattern, schema):
         
         return None
 
-    @staticmethod
-    def _entity_appears_in_all_modalities(entity_name, schema):
-        """Check if an entity appears in ALL modalities in the schema.
-        
-        Entities that appear in all modalities (like suffix, extension) should have
-        their values combined across all modalities to ensure complete coverage.
-        
-        Parameters
-        ----------
-        entity_name : str
-            The entity name to check
-        schema : object
-            The BIDS schema object
-            
-        Returns
-        -------
-        bool
-            True if entity appears in all modalities, False otherwise
-        """
-        # Get all modalities
-        all_modalities = list(schema.rules.files.raw.keys())
-        if not all_modalities:
-            return False
-            
-        modalities_with_entity = set()
-        
-        # Check each modality for this entity
-        for modality in all_modalities:
-            datatype_rules = getattr(schema.rules.files.raw, modality)
-            patterns = rules.regexify_filename_rules(datatype_rules, schema, level=1)
-            
-            # Check if entity appears in any pattern for this modality
-            for pattern in patterns:
-                entities_in_pattern = re.findall(r'\(\?P<([^>]+)>', pattern['regex'])
-                if entity_name in entities_in_pattern:
-                    modalities_with_entity.add(modality)
-                    break  # Found in this modality, move to next
-        
-        # Return True if entity appears in ALL modalities
-        return len(modalities_with_entity) == len(all_modalities)
 
 
 

From fd07a267cfee5db6abd5b580ef46709359d7b0a6 Mon Sep 17 00:00:00 2001
From: Ashley Stewart <a.stewart.au@gmail.com>
Date: Tue, 9 Sep 2025 16:44:20 +1000
Subject: [PATCH 05/10] Remove unnecessary fallbacks

---
 src/bids/layout/layout.py | 6 ------
 1 file changed, 6 deletions(-)

diff --git a/src/bids/layout/layout.py b/src/bids/layout/layout.py
index 73b5b1e8..d447bce2 100644
--- a/src/bids/layout/layout.py
+++ b/src/bids/layout/layout.py
@@ -422,9 +422,6 @@ def _find_directories_for_entity_using_schema(self, entity_name, results):
         
         # Load schema directory rules
         bids_schema = bst_schema.load_schema()
-        if not hasattr(bids_schema.rules, 'directories') or not hasattr(bids_schema.rules.directories, 'raw'):
-            # Fallback to simple approach if no schema rules
-            return [f.dirname for f in results if entity_name in f.entities]
         
         directory_rules = bids_schema.rules.directories.raw
         directories = set()
@@ -438,9 +435,6 @@ def _find_directories_for_entity_using_schema(self, entity_name, results):
                     # Convert to full directory path
                     directory_path = os.path.join(self.root, *path_components)
                     directories.add(directory_path)
-                else:
-                    # Fallback to actual path if schema reconstruction fails
-                    directories.add(f.dirname)
         
         return list(directories)
     

From 14098637360c2145c2e352fbe36604f21eeb2845 Mon Sep 17 00:00:00 2001
From: Ashley Stewart <a.stewart.au@gmail.com>
Date: Tue, 9 Sep 2025 16:50:32 +1000
Subject: [PATCH 06/10] Remove unnecessary fallback

---
 src/bids/layout/models.py | 16 ++++------------
 1 file changed, 4 insertions(+), 12 deletions(-)

diff --git a/src/bids/layout/models.py b/src/bids/layout/models.py
index 272732d8..21636631 100644
--- a/src/bids/layout/models.py
+++ b/src/bids/layout/models.py
@@ -264,7 +264,7 @@ def _from_schema(cls, schema_version=None, session=None):
         # Store patterns directly - NO PARSING EVER
         config_name = f'bids-schema-{bids_schema.bids_version}'
         
-        # Extract entities directly from schema rules - no regex parsing!
+        # Extract entities directly from schema rules
         entity_names = cls._extract_entity_names_from_rules(bids_schema)
         
         # Extract actual values for key entities from schema rules directly
@@ -279,7 +279,7 @@ def _from_schema(cls, schema_version=None, session=None):
     
     @classmethod
     def _extract_entity_names_from_rules(cls, bids_schema):
-        """Extract entity names directly from schema rules - no regex parsing!"""
+        """Extract entity names directly from schema rules"""
         
         # Get entity names from all rule sections
         entity_names = set()
@@ -333,14 +333,6 @@ def _create_entities_from_schema(cls, entity_names, entity_values, bids_schema):
                         pattern = fr'/({values})/'  # Datatypes are directory names  
                     else:  # suffix
                         pattern = fr'[_/\\\\]({values})\.?'  # Before extension, with optional dot
-                else:
-                    # Fallback for special entities without values
-                    if entity_name == 'extension':
-                        pattern = fr'(\.[^/\\\\]+)\Z'
-                    elif entity_name == 'datatype':
-                        pattern = fr'/([a-z]+)/'
-                    else:  # suffix
-                        pattern = fr'[_/\\\\]([a-zA-Z0-9]+)\.?'
             else:
                 # Regular BIDS entities - use schema format patterns directly
                 bids_prefix = entity_spec.get('name', entity_name)
@@ -372,7 +364,7 @@ def _create_entities_from_schema(cls, entity_names, entity_values, bids_schema):
     
     @classmethod
     def _extract_entity_values_from_rules(cls, bids_schema, entity_names):
-        """Extract entity values directly from schema rules - no regex parsing!"""
+        """Extract entity values directly from schema rules"""
         
         entity_values = {name: set() for name in entity_names}
         file_sections = {
@@ -408,7 +400,7 @@ def _get_entity_separator_from_schema(entity_name, bids_schema):
         """Get entity separator pattern directly from schema definition.
         
         Uses schema entity definitions and directory rules to determine
-        the correct separator pattern without regex parsing.
+        the correct separator pattern.
         """
         # Handle special entities that don't have BIDS prefixes
         special_entities = {'suffix', 'extension', 'datatype'}

From d44bb7b875c0bc190e899ca4c4010f740e8a8040 Mon Sep 17 00:00:00 2001
From: "Christopher J. Markiewicz" <markiewicz@stanford.edu>
Date: Tue, 23 Sep 2025 13:06:45 -0400
Subject: [PATCH 07/10] [DATALAD RUNCMD] uv lock

=== Do not change lines below ===
{
 "chain": [],
 "cmd": "uv lock",
 "exit": 0,
 "extra_inputs": [],
 "inputs": [
  "pyproject.toml"
 ],
 "outputs": [
  "uv.lock"
 ],
 "pwd": "."
}
^^^ Do not change lines above ^^^
---
 uv.lock | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/uv.lock b/uv.lock
index 3e75fc38..f1d80117 100644
--- a/uv.lock
+++ b/uv.lock
@@ -2289,6 +2289,7 @@ name = "pybids"
 source = { editable = "." }
 dependencies = [
     { name = "bids-validator" },
+    { name = "bidsschematools" },
     { name = "click", version = "8.1.8", source = { registry = "https://pypi.org/simple" }, marker = "python_full_version < '3.10'" },
     { name = "click", version = "8.3.0", source = { registry = "https://pypi.org/simple" }, marker = "python_full_version >= '3.10'" },
     { name = "formulaic" },
@@ -2389,6 +2390,7 @@ requires-dist = [
     { name = "altair", marker = "extra == 'model-reports'" },
     { name = "altair", marker = "extra == 'test'", specifier = ">=5" },
     { name = "bids-validator", specifier = ">=1.14.7" },
+    { name = "bidsschematools", specifier = ">=1.1.0" },
     { name = "bsmschema", marker = "extra == 'test'", specifier = ">=0.1" },
     { name = "click", specifier = ">=8.0" },
     { name = "coverage", extras = ["toml"], marker = "extra == 'test'", specifier = ">=5.2.1" },

From 71c003b3f6d4609afe57d1ab2f5e1d4700bb8e27 Mon Sep 17 00:00:00 2001
From: Ashley Stewart <a.stewart.au@gmail.com>
Date: Thu, 30 Oct 2025 09:46:46 +1000
Subject: [PATCH 08/10] Remove unused functions and add tests

---
 src/bids/layout/models.py                   | 77 ---------------------
 src/bids/layout/tests/test_schema_config.py | 49 ++++++++++---
 2 files changed, 38 insertions(+), 88 deletions(-)

diff --git a/src/bids/layout/models.py b/src/bids/layout/models.py
index 21636631..49ad1098 100644
--- a/src/bids/layout/models.py
+++ b/src/bids/layout/models.py
@@ -392,83 +392,6 @@ def _extract_entity_values_from_rules(cls, bids_schema, entity_names):
                     # They use generic patterns like [0-9a-zA-Z]+ 
         
         return entity_values
-    
-    
-    
-    @staticmethod
-    def _get_entity_separator_from_schema(entity_name, bids_schema):
-        """Get entity separator pattern directly from schema definition.
-        
-        Uses schema entity definitions and directory rules to determine
-        the correct separator pattern.
-        """
-        # Handle special entities that don't have BIDS prefixes
-        special_entities = {'suffix', 'extension', 'datatype'}
-        if entity_name in special_entities:
-            if entity_name == 'extension':
-                return ''  # Extensions appear at end with no separator
-            elif entity_name == 'datatype':
-                return '/'  # Datatypes are directory names
-            else:  # suffix
-                return '[_/\\\\]+'  # Suffixes have underscore separators
-        
-        # Regular BIDS entities - get info from schema
-        if entity_name not in bids_schema.objects.entities:
-            return f'[_/\\\\]+{entity_name}-'  # Fallback
-            
-        entity_spec = bids_schema.objects.entities[entity_name]
-        bids_prefix = entity_spec.get('name', entity_name)
-        
-        # Directory-level entities (subject, session) use slash separators
-        if entity_name in ['subject', 'session']:
-            return f'[/\\\\]+{bids_prefix}-'
-        else:
-            # File-level entities use underscore separators  
-            return f'[_/\\\\]+{bids_prefix}-'
-    
-    @staticmethod
-    def _extract_directory_config(entity_name, pattern, schema):
-        """Extract directory configuration from schema directory rules."""
-        
-        # Get directory rules from schema
-        if not hasattr(schema.rules, 'directories') or not hasattr(schema.rules.directories, 'raw'):
-            return None
-            
-        dir_rules = schema.rules.directories.raw
-        
-        # Check if this entity has a directory rule
-        for rule_name, rule in dir_rules.items():
-            rule_dict = dict(rule)
-            if rule_dict.get('entity') == entity_name:
-                # This entity creates directories
-                
-                # Build directory template based on schema hierarchy
-                # Find parent entities by checking what directories can contain this entity
-                parent_entities = []
-                
-                # Check all directory rules to see which ones list this entity in subdirs
-                for parent_rule_name, parent_rule in dir_rules.items():
-                    parent_rule_dict = dict(parent_rule)
-                    subdirs = parent_rule_dict.get('subdirs', [])
-                    
-                    # Check if this entity is in parent's subdirs
-                    for subdir in subdirs:
-                        if isinstance(subdir, dict) and 'oneOf' in subdir:
-                            if entity_name in subdir['oneOf']:
-                                if parent_rule_dict.get('entity'):
-                                    parent_entities.append(parent_rule_dict['entity'])
-                        elif subdir == entity_name:
-                            if parent_rule_dict.get('entity'):
-                                parent_entities.append(parent_rule_dict['entity'])
-                
-                # Build template with parent hierarchy
-                template_parts = []
-                template_parts.extend([f'{{{parent}}}' for parent in parent_entities])
-                template_parts.append(f'{{{entity_name}}}')
-                
-                return ''.join(template_parts)
-        
-        return None
 
 
 
diff --git a/src/bids/layout/tests/test_schema_config.py b/src/bids/layout/tests/test_schema_config.py
index 21abd249..b8165276 100644
--- a/src/bids/layout/tests/test_schema_config.py
+++ b/src/bids/layout/tests/test_schema_config.py
@@ -61,53 +61,80 @@ def test_schema_version_parameter(self):
         """Test loading specific schema version."""
         # Test loading BIDS v1.9.0 schema
         config = Config.load({'schema_version': '1.9.0'})
-        
+
         assert config.name.startswith('bids-schema-')
         assert len(config.entities) > 0
         # Should include the version in the name
         assert '1.9.0' in config.name
-    
+
+    def test_invalid_schema_version(self):
+        """Test that loading invalid schema version raises appropriate error."""
+        # Test with a non-existent version
+        with pytest.raises(ValueError, match="Failed to load BIDS schema version"):
+            Config.load({'schema_version': '99.99.99'})
+
     
     
     def test_entity_generation(self):
         """Test entity pattern generation using real schema."""
         config = Config.load('bids-schema')
-        
+
         # Test that entities were generated
         assert len(config.entities) > 0
-        
+
         # Test specific entity properties we care about (using full names)
         assert 'subject' in config.entities
         assert 'task' in config.entities
         assert 'run' in config.entities
-        
+
         # Test that patterns are valid regex
         for entity_name, entity in config.entities.items():
             assert entity.regex is not None, f"Entity {entity_name} missing regex"
             # Test that regex compiles and works
             assert hasattr(entity.regex, 'search'), f"Entity {entity_name} regex invalid"
-        
+
         # Test specific entity patterns work correctly
         subject_entity = config.entities['subject']
         assert 'sub-' in subject_entity.pattern
-        
+
         # Test subject pattern matches expected format
         match = subject_entity.regex.search('/sub-01/')
         assert match is not None
         assert match.group(1) == '01'
-        
-        # Test task pattern matches expected format  
+
+        # Test task pattern matches expected format
         task_entity = config.entities['task']
         match = task_entity.regex.search('_task-rest_')
         assert match is not None
         assert match.group(1) == 'rest'
-        
+
         # Test run pattern matches expected format
         run_entity = config.entities['run']
         match = run_entity.regex.search('_run-02_')
         assert match is not None
         assert match.group(1) == '02'
-    
+
+    def test_schema_directory_query(self):
+        """Test directory queries with schema-based config."""
+        from bids import BIDSLayout
+        from pathlib import Path
+        import os
+
+        # Use the 7t_trt test dataset
+        data_dir = Path(__file__).parent.parent.parent.parent.parent / 'tests' / 'data' / '7t_trt'
+        if not data_dir.exists():
+            pytest.skip("Test dataset not available")
+
+        # Create layout with schema config
+        layout = BIDSLayout(data_dir, config='bids-schema', derivatives=False)
+
+        # Test directory query for subjects
+        subject_dirs = layout.get(target='subject', return_type='dir')
+        assert len(subject_dirs) > 0
+        assert all(os.path.exists(d) for d in subject_dirs)
+        # Should include subject directories
+        assert any('sub-' in os.path.basename(d) for d in subject_dirs)
+
 
 
 if __name__ == '__main__':

From e2f8d9b979d32cd5d10b844a92eb6e7781acbb4e Mon Sep 17 00:00:00 2001
From: Ashley Stewart <a.stewart.au@gmail.com>
Date: Thu, 30 Oct 2025 10:50:57 +1000
Subject: [PATCH 09/10] Add tests to improve coverage

---
 src/bids/layout/layout.py            | 10 +++---
 src/bids/layout/tests/test_layout.py | 54 ++++++++++++++++++++++++++++
 2 files changed, 60 insertions(+), 4 deletions(-)

diff --git a/src/bids/layout/layout.py b/src/bids/layout/layout.py
index d447bce2..3ffad96b 100644
--- a/src/bids/layout/layout.py
+++ b/src/bids/layout/layout.py
@@ -446,12 +446,13 @@ def _build_path_from_schema_rules(self, entities, directory_rules):
         # Traverse schema directory hierarchy
         while current_level:
             rule = directory_rules.get(current_level)
-            if not rule:
+            if not rule:  # pragma: no cover
+                # Schema always provides valid rules; current_level only set from schema subdirs
                 break
-                
+
             rule_dict = dict(rule)
             rule_entity = rule_dict.get('entity')
-            
+
             # If this rule corresponds to an entity, add directory component
             if rule_entity and rule_entity in entities:
                 entity_value = entities[rule_entity]
@@ -459,7 +460,8 @@ def _build_path_from_schema_rules(self, entities, directory_rules):
                     path_components.append(f'sub-{entity_value}')
                 elif rule_entity == 'session':
                     path_components.append(f'ses-{entity_value}')
-                else:
+                else:  # pragma: no cover
+                    # BIDS schema only defines subject/session entity directories currently
                     path_components.append(str(entity_value))
             elif current_level == 'datatype' and 'datatype' in entities:
                 # Special case: datatype rule has no entity field but we use the datatype value
diff --git a/src/bids/layout/tests/test_layout.py b/src/bids/layout/tests/test_layout.py
index a9d99085..67df7ccb 100644
--- a/src/bids/layout/tests/test_layout.py
+++ b/src/bids/layout/tests/test_layout.py
@@ -1214,3 +1214,57 @@ def test_intended_for(temporary_dataset, intent):
 
     layout = BIDSLayout(temporary_dataset)
     assert layout.get_IntendedFor(subject='01')
+
+
+def test_get_return_type_dir_with_schema_datatype(layout_7t_trt):
+    """Test return_type='dir' with schema config for datatype entity (covers line 463)."""
+    # Create a schema-based layout
+    layout = BIDSLayout(layout_7t_trt.root, config='bids-schema', derivatives=False)
+
+    # Query for datatype directories - this should trigger the code path
+    # that handles non-subject/non-session entities (line 463)
+    dirs = layout.get(target='datatype', return_type='dir')
+
+    # Should return actual datatype directories
+    assert len(dirs) > 0
+    # Check that these are actual directories in the dataset
+    for d in dirs:
+        assert os.path.exists(d)
+
+
+def test_get_return_type_dir_with_legacy_config_no_template():
+    """Test return_type='dir' with legacy config missing directory template (covers line 842)."""
+    # Create a minimal config without directory templates
+    config_dict = {
+        'name': 'test_config',
+        'entities': [
+            {
+                'name': 'subject',
+                'pattern': '[/\\\\]sub-([a-zA-Z0-9]+)',
+                'directory': '/sub-{subject}/'  # This has a template
+            },
+            {
+                'name': 'task',
+                'pattern': '_task-([a-zA-Z0-9]+)',
+                # No directory template - this should trigger the error
+            }
+        ]
+    }
+
+    # Create temporary test dataset
+    import tempfile
+    with tempfile.TemporaryDirectory() as tmpdir:
+        # Create minimal BIDS structure
+        Path(tmpdir).joinpath('dataset_description.json').write_text('{"Name": "test", "BIDSVersion": "1.6.0"}')
+        sub_dir = Path(tmpdir) / 'sub-01'
+        sub_dir.mkdir()
+        test_file = sub_dir / 'sub-01_task-rest_bold.nii.gz'
+        test_file.touch()
+
+        # Load with our custom config
+        layout = BIDSLayout(tmpdir, config=config_dict, validate=False)
+
+        # This should raise ValueError when trying to get directories for 'task'
+        # because task entity has no directory template
+        with pytest.raises(ValueError, match='Return type set to directory'):
+            layout.get(target='task', return_type='dir')

From e3e6108cec730ee8c02bb96bbab2a2ba12e90c6c Mon Sep 17 00:00:00 2001
From: Ashley Stewart <a.stewart.au@gmail.com>
Date: Thu, 30 Oct 2025 12:18:08 +1000
Subject: [PATCH 10/10] Remove unnecessary test execution code

---
 src/bids/layout/tests/test_schema_config.py            | 10 ----------
 .../layout/tests/test_schema_pattern_validation.py     | 10 ----------
 .../layout/tests/test_schema_version_differences.py    |  8 --------
 src/bids/layout/tests/test_schema_vs_json_config.py    |  7 -------
 4 files changed, 35 deletions(-)

diff --git a/src/bids/layout/tests/test_schema_config.py b/src/bids/layout/tests/test_schema_config.py
index b8165276..42feef50 100644
--- a/src/bids/layout/tests/test_schema_config.py
+++ b/src/bids/layout/tests/test_schema_config.py
@@ -135,13 +135,3 @@ def test_schema_directory_query(self):
         # Should include subject directories
         assert any('sub-' in os.path.basename(d) for d in subject_dirs)
 
-
-
-if __name__ == '__main__':
-    # Allow running as script for quick testing
-    test_instance = TestSchemaConfig()
-    test_instance.test_load_bids_schema_basic()
-    test_instance.test_schema_entity_patterns()
-    test_instance.test_schema_version_tracking()
-    test_instance.test_schema_version_parameter()
-    print("Basic tests passed!")
\ No newline at end of file
diff --git a/src/bids/layout/tests/test_schema_pattern_validation.py b/src/bids/layout/tests/test_schema_pattern_validation.py
index ebfef9af..594825ea 100644
--- a/src/bids/layout/tests/test_schema_pattern_validation.py
+++ b/src/bids/layout/tests/test_schema_pattern_validation.py
@@ -185,13 +185,3 @@ def test_no_hardcoded_values(self):
         print(f"Schema datatypes used: {len(bids_schema.objects.datatypes)}")
         print(f"Schema entities used: {len(bids_schema.objects.entities)}")
 
-
-if __name__ == '__main__':
-    # Allow running as script for quick testing
-    test_instance = TestSchemaAccuracy()
-    test_instance.test_extension_patterns_match_schema()
-    test_instance.test_suffix_patterns_match_schema()
-    test_instance.test_datatype_patterns_match_schema()
-    test_instance.test_entity_format_patterns_match_schema()
-    test_instance.test_no_hardcoded_values()
-    print("All schema accuracy tests passed!")
\ No newline at end of file
diff --git a/src/bids/layout/tests/test_schema_version_differences.py b/src/bids/layout/tests/test_schema_version_differences.py
index 37da8cf7..10b9b239 100644
--- a/src/bids/layout/tests/test_schema_version_differences.py
+++ b/src/bids/layout/tests/test_schema_version_differences.py
@@ -90,11 +90,3 @@ def test_schema_version_metadata_differences(self):
         assert config_current.name != config_v190.name
         assert config_v190.name != config_v180.name
 
-
-if __name__ == '__main__':
-    # Allow running as script for quick testing
-    test_instance = TestSchemaVersionDifferences()
-    test_instance.test_schema_version_entity_differences()
-    test_instance.test_motion_datatype_evolution()
-    test_instance.test_schema_version_metadata_differences()
-    print("Schema version difference tests completed!")
\ No newline at end of file
diff --git a/src/bids/layout/tests/test_schema_vs_json_config.py b/src/bids/layout/tests/test_schema_vs_json_config.py
index f47b3d4a..507ea47c 100644
--- a/src/bids/layout/tests/test_schema_vs_json_config.py
+++ b/src/bids/layout/tests/test_schema_vs_json_config.py
@@ -307,10 +307,3 @@ def _map_entity_name_old_to_new(self, new_entity_name):
         }
         return name_mapping.get(new_entity_name, new_entity_name)
 
-
-if __name__ == '__main__':
-    # Allow running as script for quick testing
-    test_instance = TestConfigCompatibility()
-    test_instance.test_entity_coverage_matches()
-    test_instance.test_suffix_coverage_matches()
-    test_instance.test_filename_parsing_compatibility()