From 85495235bf56eab19be576e39cc7f5b6b96b7ca5 Mon Sep 17 00:00:00 2001 From: Austin Macdonald Date: Mon, 16 Mar 2026 11:57:16 -0500 Subject: [PATCH 01/12] Improvements for dev testing - Use base Docker image directly instead of rebuilding for tests - Avoid double overlay/install - Avoid container rebuild for every change - Remove docker build step from test scripts and CI - Fix hardcoded paths to work from any checkout - CI e2e job calls e2e_in_docker.sh directly - script was broken and untested Co-Authored-By: Claude Opus 4.6 (1M context) --- .circleci/config.yml | 29 +++++++---------------------- Dockerfile_testing | 6 ------ docs/developer_how_to_test.rst | 21 +++++++++++++++++++-- tests/e2e_in_docker.sh | 19 +++++++------------ tests/pytest_in_docker.sh | 11 +++++------ 5 files changed, 38 insertions(+), 48 deletions(-) delete mode 100644 Dockerfile_testing diff --git a/.circleci/config.yml b/.circleci/config.yml index 77deed95..12b7d3d0 100644 --- a/.circleci/config.yml +++ b/.circleci/config.yml @@ -18,18 +18,15 @@ jobs: name: pytest of BABS no_output_timeout: 1h command: | - docker build \ - -t pennlinc/slurm-docker-ci:unstable \ - -f Dockerfile_testing . - - # Make a directory that will hold the test artifacts mkdir -p ${HOME}/e2e-testing docker run -it \ - -v ${PWD}:/tests \ + -v ${PWD}:/babs \ -v ${HOME}/e2e-testing:/test-temp:rw \ + -w /babs \ -h slurmctl --cap-add sys_admin \ --privileged \ - pennlinc/slurm-docker-ci:unstable \ + pennlinc/slurm-docker-ci:0.14 \ + bash -c "pip install -e .[tests] && \ pytest -n 4 -sv \ --durations=0 \ --timeout=300 \ @@ -37,7 +34,7 @@ jobs: --cov-report term-missing \ --cov-report xml:/test-temp/coverage.xml \ --cov=babs \ - /babs + /babs/tests/" - store_test_results: path: /home/circleci/e2e-testing/junit.xml @@ -54,22 +51,10 @@ jobs: - checkout: path: /home/circleci/src/babs - run: - name: pytest of BABS + name: e2e SLURM tests no_output_timeout: 1h command: | - docker build \ - -t pennlinc/slurm-docker-ci:unstable \ - -f Dockerfile_testing . - - # Make a directory that will hold the test artifacts - mkdir -p ${HOME}/e2e-testing - docker run -it \ - -v ${PWD}:/tests \ - -v ${HOME}/e2e-testing:/test-temp:rw \ - -h slurmctl --cap-add sys_admin \ - --privileged \ - pennlinc/slurm-docker-ci:unstable \ - /tests/tests/e2e-slurm/container/walkthrough-tests.sh + E2E_DIR=${HOME}/e2e-testing bash tests/e2e_in_docker.sh - run: name: clean up test artifacts diff --git a/Dockerfile_testing b/Dockerfile_testing deleted file mode 100644 index 4b0cff97..00000000 --- a/Dockerfile_testing +++ /dev/null @@ -1,6 +0,0 @@ -FROM pennlinc/slurm-docker-ci:0.14 - -# install BABS in development mode for proper coverage tracking -COPY . /babs -WORKDIR /babs -RUN pip install -e .[tests] diff --git a/docs/developer_how_to_test.rst b/docs/developer_how_to_test.rst index a3292243..2cd0bf13 100644 --- a/docs/developer_how_to_test.rst +++ b/docs/developer_how_to_test.rst @@ -25,8 +25,25 @@ Manually run pytest The easiest way to run pytest is to run the ``tests/pytest_in_docker.sh`` script from the root directory of BABS. -This will build a docker container that has a running SLURM job scheduler system. -It will also install the local copy of BABS and run the pytests in the container. +This runs the tests inside a Docker container that has a running SLURM job scheduler system, +and installs the local copy of BABS at runtime via volume mount. + +.. code-block:: bash + + bash tests/pytest_in_docker.sh + +To run the end-to-end walkthrough tests (``babs init`` → ``submit`` → ``merge``): + +.. code-block:: bash + + bash tests/e2e_in_docker.sh + +By default, e2e test artifacts go to a temporary directory under ``/tmp``. +To specify a location (e.g., for inspection after the run): + +.. code-block:: bash + + E2E_DIR=/path/to/output bash tests/e2e_in_docker.sh ----------------------------- Automatic pytest via CircleCI diff --git a/tests/e2e_in_docker.sh b/tests/e2e_in_docker.sh index 9de3de14..a571a449 100755 --- a/tests/e2e_in_docker.sh +++ b/tests/e2e_in_docker.sh @@ -1,17 +1,12 @@ #!/bin/bash -mkdir -p "${HOME}"/projects/e2e-testing -docker build --platform linux/amd64 \ - -t pennlinc/slurm-docker-ci:unstable \ - -f Dockerfile_testing . +E2E_DIR="${E2E_DIR:-$(mktemp -d /tmp/babs-e2e-XXXXXX)}" +mkdir -p "${E2E_DIR}" +echo "E2E_DIR=${E2E_DIR}" docker run -it \ --platform linux/amd64 \ - -v "${HOME}"/projects/babs:/tests \ - -v "${HOME}"/projects/e2e-testing:/test-temp:rw \ + -v "$(pwd)":/tests \ + -v "${E2E_DIR}":/test-temp:rw \ -h slurmctl --cap-add sys_admin \ --privileged \ - pennlinc/slurm-docker-ci:unstable #\ - #/babs/tests/e2e-slurm/container/walkthrough-tests.sh - - - #pytest -svx --pdb \ - #/babs/tests \ No newline at end of file + pennlinc/slurm-docker-ci:0.14 \ + /tests/tests/e2e-slurm/container/walkthrough-tests.sh diff --git a/tests/pytest_in_docker.sh b/tests/pytest_in_docker.sh index c8646923..509c3252 100755 --- a/tests/pytest_in_docker.sh +++ b/tests/pytest_in_docker.sh @@ -1,15 +1,14 @@ #!/bin/bash -docker build --platform linux/amd64 -t pennlinc/slurm-docker-ci:unstable -f Dockerfile_testing . docker run -it \ --platform linux/amd64 \ -h slurmctl --cap-add sys_admin \ --privileged \ - -v "${HOME}"/projects/babs:/babs \ - pennlinc/slurm-docker-ci:unstable \ - pytest -svx \ + -v "$(pwd)":/babs \ + -w /babs \ + pennlinc/slurm-docker-ci:0.14 \ + bash -c "pip install -e .[tests] && pytest -svx \ --cov-report=term-missing \ --cov-report=xml \ --cov=babs \ --pdb \ - /babs/tests/ - \ No newline at end of file + /babs/tests/" From ef2b97ff14c9eb5691afaf4337a711384672b6ef Mon Sep 17 00:00:00 2001 From: Austin Macdonald Date: Mon, 16 Mar 2026 12:30:23 -0500 Subject: [PATCH 02/12] Write test artifacts to tmp_path instead of repo root Tests were writing generated shell scripts to the working directory, which left root-owned files in the checkout when run via Docker. Co-Authored-By: Claude Opus 4.6 (1M context) --- tests/test_generate_bidsapp_runscript.py | 8 ++++---- tests/test_generate_submit_script.py | 4 ++-- 2 files changed, 6 insertions(+), 6 deletions(-) diff --git a/tests/test_generate_bidsapp_runscript.py b/tests/test_generate_bidsapp_runscript.py index 9c454475..19a34402 100644 --- a/tests/test_generate_bidsapp_runscript.py +++ b/tests/test_generate_bidsapp_runscript.py @@ -112,7 +112,7 @@ def test_get_input_unipping_cmds(): @pytest.mark.parametrize(('input_datasets', 'config_file', 'processing_level'), testing_pairs) -def test_generate_bidsapp_runscript(input_datasets, config_file, processing_level): +def test_generate_bidsapp_runscript(input_datasets, config_file, processing_level, tmp_path): """Test that the bidsapp runscript is generated correctly.""" config_path = NOTEBOOKS_DIR / config_file container_name = config_file.split('_')[1] @@ -130,7 +130,7 @@ def test_generate_bidsapp_runscript(input_datasets, config_file, processing_leve templateflow_home='/path/to/templateflow_home', ) - out_fn = Path('.') / f'{config_path.name}_{processing_level}.sh' + out_fn = tmp_path / f'{config_path.name}_{processing_level}.sh' with open(out_fn, 'w') as f: f.write(script_content) passed, status = run_shellcheck(str(out_fn)) @@ -164,7 +164,7 @@ def run_shellcheck(script_path): return False, str(e) -def test_generate_pipeline_runscript(): +def test_generate_pipeline_runscript(tmp_path): """Test that the pipeline runscript is generated correctly.""" config_path = NOTEBOOKS_DIR / 'eg_nordic-fmriprep_pipeline.yaml' config = read_yaml(config_path) @@ -180,7 +180,7 @@ def test_generate_pipeline_runscript(): final_zip_foldernames=config.get('zip_foldernames', {}), ) - out_fn = Path('.') / f'{config_path.name}_pipeline.sh' + out_fn = tmp_path / f'{config_path.name}_pipeline.sh' with open(out_fn, 'w') as f: f.write(script_content) passed, status = run_shellcheck(str(out_fn)) diff --git a/tests/test_generate_submit_script.py b/tests/test_generate_submit_script.py index 6d0868a3..62184ef0 100644 --- a/tests/test_generate_submit_script.py +++ b/tests/test_generate_submit_script.py @@ -91,7 +91,7 @@ @pytest.mark.parametrize(('input_datasets', 'config_file', 'processing_level'), testing_pairs) -def test_generate_submit_script(input_datasets, config_file, processing_level): +def test_generate_submit_script(input_datasets, config_file, processing_level, tmp_path): """Test that the bidsapp runscript is generated correctly.""" config_path = NOTEBOOKS_DIR / config_file container_name = config_file.split('_')[1] @@ -107,7 +107,7 @@ def test_generate_submit_script(input_datasets, config_file, processing_level): zip_foldernames=config['zip_foldernames'], ) - out_fn = Path('.') / f'participant_job_{config_path.name}_{processing_level}.sh' + out_fn = tmp_path / f'participant_job_{config_path.name}_{processing_level}.sh' with open(out_fn, 'w') as f: f.write(script_content) passed, status = run_shellcheck(str(out_fn)) From c7a8ed54e2441694d84b6694714e9770c8f3c93e Mon Sep 17 00:00:00 2001 From: Austin Macdonald Date: Mon, 16 Mar 2026 12:33:42 -0500 Subject: [PATCH 03/12] Write coverage.xml to /tmp instead of repo root Co-Authored-By: Claude Opus 4.6 (1M context) --- tests/pytest_in_docker.sh | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tests/pytest_in_docker.sh b/tests/pytest_in_docker.sh index 509c3252..4251d601 100755 --- a/tests/pytest_in_docker.sh +++ b/tests/pytest_in_docker.sh @@ -8,7 +8,7 @@ docker run -it \ pennlinc/slurm-docker-ci:0.14 \ bash -c "pip install -e .[tests] && pytest -svx \ --cov-report=term-missing \ - --cov-report=xml \ + --cov-report=xml:/tmp/coverage.xml \ --cov=babs \ --pdb \ /babs/tests/" From 54aff89bee720afe5762a1936f5fa5fc9b73acca Mon Sep 17 00:00:00 2001 From: Austin Macdonald Date: Mon, 16 Mar 2026 14:40:22 -0500 Subject: [PATCH 04/12] Discover container image path via containers-list and register at analysis level Instead of hardcoding the container path as containers/.datalad/environments/{name}/image, use datalad containers-list to discover it from the container dataset, then register it at the analysis level via containers-add. This makes babs work with any container dataset layout. Co-Authored-By: Claude Opus 4.6 (1M context) --- babs/bootstrap.py | 46 +++++++++++++++++++++++++++++++++++++++------- babs/container.py | 7 +++---- 2 files changed, 42 insertions(+), 11 deletions(-) diff --git a/babs/bootstrap.py b/babs/bootstrap.py index 878f91f5..5cba7472 100644 --- a/babs/bootstrap.py +++ b/babs/bootstrap.py @@ -223,10 +223,38 @@ def babs_bootstrap( ) # into `analysis/containers` folder - # Create initial container for sanity check - container = Container(container_ds, container_name, container_config) + # Discover container image path from the container dataset: + containers_path = op.join(self.analysis_path, 'containers') + result = dlapi.containers_list(dataset=containers_path, result_renderer='disabled') + container_info = [r for r in result + if r['action'] == 'containers' and r['name'] == container_name] + if not container_info: + available = [r['name'] for r in result if r['action'] == 'containers'] + raise ValueError( + f"Container '{container_name}' not found in container dataset. " + f"Available: {available}" + ) + image_path_in_ds = op.relpath(container_info[0]['path'], containers_path) + container_image_path = op.join('containers', image_path_in_ds) + + # Build call_fmt from user's singularity_args: + with open(container_config) as f: + user_config = yaml.safe_load(f) + singularity_args = user_config.get('singularity_args', []) + singularity_args_str = ' '.join(singularity_args) if singularity_args else '' + call_fmt = f'singularity run -B $PWD --pwd $PWD {singularity_args_str} {{img}} {{cmd}}' + + # Register container at analysis level so datalad containers-run works: + print(f'\nRegistering container at analysis level: {container_image_path}') + dlapi.containers_add( + dataset=self.analysis_path, + name=container_name, + image=container_image_path, + call_fmt=call_fmt, + ) - # sanity check of container ds: + container = Container(container_ds, container_name, container_config, + container_image_path=container_image_path) container.sanity_check(self.analysis_path) # ============================================================== @@ -250,9 +278,11 @@ def babs_bootstrap( container = containers[0] else: self._bootstrap_single_app_scripts( - container_ds, container_name, container_config, system + container_ds, container_name, container_config, system, + container_image_path=container_image_path, ) - container = Container(container_ds, container_name, container_config) + container = Container(container_ds, container_name, container_config, + container_image_path=container_image_path) # Copy in any other files needed: self._init_import_files(container.config.get('imported_files', [])) @@ -390,10 +420,12 @@ def babs_bootstrap( print('`babs init` was successful!') def _bootstrap_single_app_scripts( - self, container_ds, container_name, container_config, system + self, container_ds, container_name, container_config, system, + container_image_path=None, ): """Bootstrap scripts for single BIDS app configuration.""" - container = Container(container_ds, container_name, container_config) + container = Container(container_ds, container_name, container_config, + container_image_path=container_image_path) # Generate `_zip.sh`: ---------------------------------- # which is a bash script of singularity run + zip diff --git a/babs/container.py b/babs/container.py index f3296286..36cad45b 100644 --- a/babs/container.py +++ b/babs/container.py @@ -14,7 +14,8 @@ class Container: """This class is for the BIDS App Container""" - def __init__(self, container_ds, container_name, config_yaml_file): + def __init__(self, container_ds, container_name, config_yaml_file, + container_image_path=None): """ This is to initialize Container class. @@ -67,9 +68,7 @@ def __init__(self, container_ds, container_name, config_yaml_file): with open(self.config_yaml_file) as f: self.config = yaml.safe_load(f) - self.container_path_relToAnalysis = op.join( - 'containers', '.datalad', 'environments', self.container_name, 'image' - ) + self.container_path_relToAnalysis = container_image_path def sanity_check(self, analysis_path): """ From dd8b44b982c61522c318b994debc65be6d7e4a6d Mon Sep 17 00:00:00 2001 From: Austin Macdonald Date: Mon, 16 Mar 2026 15:44:24 -0500 Subject: [PATCH 05/12] Replace singularity run with datalad containers-run in job template The job template now uses three steps instead of one: 1. datalad containers-run for the BIDS app 2. datalad run for zipping outputs 3. git rm for raw output cleanup (datalad run --explicit doesn't track deletions) Container args parsed via existing bids_app_args_from_config and passed through generate_submit_script to the template. Co-Authored-By: Claude Opus 4.6 (1M context) --- babs/container.py | 25 +++++++++++- babs/generate_submit_script.py | 10 +++++ babs/templates/participant_job.sh.jinja2 | 49 +++++++++++++++--------- 3 files changed, 64 insertions(+), 20 deletions(-) diff --git a/babs/container.py b/babs/container.py index 36cad45b..d5e56664 100644 --- a/babs/container.py +++ b/babs/container.py @@ -6,7 +6,10 @@ import yaml from jinja2 import Environment, PackageLoader, StrictUndefined -from babs.generate_bidsapp_runscript import generate_bidsapp_runscript +from babs.generate_bidsapp_runscript import ( + bids_app_args_from_config, + generate_bidsapp_runscript, +) from babs.generate_submit_script import generate_submit_script, generate_test_submit_script from babs.utils import app_output_settings_from_config @@ -164,16 +167,34 @@ def generate_bash_participant_job( Shown in the script error message when PROJECT_ROOT is unset. """ + input_datasets = input_ds.as_records() + _, bids_app_output_dir = app_output_settings_from_config(self.config) + + raw_bids_app_args = self.config.get('bids_app_args', None) + if raw_bids_app_args: + bids_app_args, subject_selection_flag, _, _, bids_app_input_dir = ( + bids_app_args_from_config(raw_bids_app_args, input_datasets) + ) + else: + bids_app_args = [] + subject_selection_flag = '--participant-label' + bids_app_input_dir = input_datasets[0]['unzipped_path_containing_subject_dirs'] + script_content = generate_submit_script( queue_system=system.type, cluster_resources_config=self.config['cluster_resources'], script_preamble=self.config['script_preamble'], job_scratch_directory=self.config['job_compute_space'], - input_datasets=input_ds.as_records(), + input_datasets=input_datasets, processing_level=processing_level, container_name=self.container_name, zip_foldernames=self.config['zip_foldernames'], project_root=project_root, + container_image_path=self.container_path_relToAnalysis, + bids_app_args=bids_app_args, + bids_app_input_dir=bids_app_input_dir, + bids_app_output_dir=bids_app_output_dir, + subject_selection_flag=subject_selection_flag, ) with open(bash_path, 'w') as f: diff --git a/babs/generate_submit_script.py b/babs/generate_submit_script.py index c586619d..0ed9fd8a 100644 --- a/babs/generate_submit_script.py +++ b/babs/generate_submit_script.py @@ -30,6 +30,11 @@ def generate_submit_script( container_images=None, datalad_run_message=None, project_root=None, + container_image_path=None, + bids_app_args=None, + bids_app_input_dir=None, + bids_app_output_dir=None, + subject_selection_flag=None, ): """ Generate a bash script that runs the BIDS App singularity image. @@ -122,6 +127,11 @@ def generate_submit_script( container_images=container_images, datalad_run_message=datalad_run_message, project_root=project_root, + container_image_path=container_image_path, + bids_app_args=bids_app_args or [], + bids_app_input_dir=bids_app_input_dir or '', + bids_app_output_dir=bids_app_output_dir or '', + subject_selection_flag=subject_selection_flag or '', ) diff --git a/babs/templates/participant_job.sh.jinja2 b/babs/templates/participant_job.sh.jinja2 index 9b4786b6..8a134d0a 100644 --- a/babs/templates/participant_job.sh.jinja2 +++ b/babs/templates/participant_job.sh.jinja2 @@ -107,9 +107,8 @@ fi {{ zip_locator_text }} # Link to shared container image so each job does not re-clone the same image. -# If shared path is not available (e.g. Slurm Docker workers), retrieve image in this clone. -CONTAINER_SHARED="${PROJECT_ROOT}/analysis/containers/.datalad/environments/{{ container_name }}/image" -CONTAINER_JOB="containers/.datalad/environments/{{ container_name }}/image" +CONTAINER_SHARED="${PROJECT_ROOT}/analysis/{{ container_image_path }}" +CONTAINER_JOB="{{ container_image_path }}" if [ ! -L "${CONTAINER_SHARED}" ]; then echo "ERROR: shared container image not found at ${CONTAINER_SHARED}" >&2 @@ -125,35 +124,49 @@ if [ ! -L "${CONTAINER_JOB}" ]; then exit 1 fi -# datalad run: -datalad run \ - -i "{{ run_script_relpath if run_script_relpath else 'code/' + container_name + '_zip.sh' }}" \ +# Step 1: Run BIDS app via containers-run +datalad containers-run \ + -n {{ container_name }} \ + --explicit \ {% for input_dataset in input_datasets %} {% if not input_dataset['is_zipped'] %} - -i "{{ input_dataset['unzipped_path_containing_subject_dirs'] }}/{% raw %}${subid}{% endraw %}{% if processing_level == 'session' %}/{% raw %}${sesid}{% endraw %}{% endif %}" \ - -i "{{ input_dataset['path_in_babs'] }}/dataset_description.json" \ + --input "{{ input_dataset['unzipped_path_containing_subject_dirs'] }}/{% raw %}${subid}{% endraw %}{% if processing_level == 'session' %}/{% raw %}${sesid}{% endraw %}{% endif %}" \ + --input "{{ input_dataset['path_in_babs'] }}/dataset_description.json" \ {% else %} - -i "${%raw%}{{%endraw%}{{ input_dataset['name'].upper() }}_ZIP{%raw%}}{%endraw%}" \ + --input "${%raw%}{{%endraw%}{{ input_dataset['name'].upper() }}_ZIP{%raw%}}{%endraw%}" \ {% endif %} {% endfor %} -{% if container_images %} -{% for image_path in container_images %} - -i "{{ image_path }}" \ -{% endfor %} -{% else %} - -i "containers/.datalad/environments/{{container_name}}/image" \ -{% endif %} {% if datalad_expand_inputs %} --expand inputs \ {% endif %} + --output "{{ bids_app_output_dir }}" \ + -m "{{ container_name }} {% raw %}${subid}{% endraw %}{% if processing_level == 'session' %} {% raw %}${sesid}{% endraw %}{% endif %}" \ + -- \ + {{ bids_app_input_dir }} \ + {{ bids_app_output_dir }} \ + participant \ +{% for bids_app_arg in bids_app_args %} +{% if bids_app_arg %} + {{ bids_app_arg }} \ +{% endif %} +{% endfor %} + {{ subject_selection_flag }} "{% raw %}${subid}{% endraw %}" + +# Step 2: Zip outputs +datalad run \ --explicit \ {% if zip_foldernames is not none %} {% for key, value in zip_foldernames.items() %} -o "{% raw %}${subid}{% endraw %}{% if processing_level == 'session' %}_{% raw %}${sesid}{% endraw %}{% endif %}_{{ key }}-{{ value }}.zip" \ {% endfor %} {% endif %} - -m "{{ (datalad_run_message if datalad_run_message is defined and datalad_run_message else container_name) }} {% raw %}${subid}{% endraw %}{% if processing_level == 'session' %} {% raw %}${sesid}{% endraw %}{% endif %}" \ - "bash ./{{ run_script_relpath if run_script_relpath else 'code/' + container_name + '_zip.sh' }} {% raw %}${subid}{% endraw %} {% if processing_level == 'session' %} {% raw %}${sesid}{% endraw %}{% endif %}{% for input_dataset in input_datasets %}{% if input_dataset['is_zipped'] %} ${%raw%}{{%endraw%}{{ input_dataset['name'].upper() }}_ZIP{%raw%}}{%endraw%}{%endif%}{%endfor%}" + -m "Zip {% raw %}${subid}{% endraw %}{% if processing_level == 'session' %} {% raw %}${sesid}{% endraw %}{% endif %}" \ + -- \ + bash ./code/{{ container_name }}_zip.sh "{% raw %}${subid}{% endraw %}"{% if processing_level == 'session' %} "{% raw %}${sesid}{% endraw %}"{% endif %} + +# Step 3: Remove raw outputs (datalad run --explicit doesn't track deletions) +git rm -rf {{ bids_app_output_dir }} +git commit -m "Remove raw outputs for {% raw %}${subid}{% endraw %}{% if processing_level == 'session' %} {% raw %}${sesid}{% endraw %}{% endif %} (zipped)" # Finish up: # push result file content to output RIA storage: From bc6ab49fcbad464638ae62323f0f60013ba627d9 Mon Sep 17 00:00:00 2001 From: Austin Macdonald Date: Mon, 16 Mar 2026 16:04:16 -0500 Subject: [PATCH 06/12] Generate zip-only script instead of singularity+zip MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The zip script no longer runs the container or removes outputs — both are now handled by the job template (containers-run and git rm). Co-Authored-By: Claude Opus 4.6 (1M context) --- babs/bootstrap.py | 11 +++++------ babs/container.py | 23 +++++++++++++++++++++++ babs/templates/zip_outputs.sh.jinja2 | 9 +++++++++ 3 files changed, 37 insertions(+), 6 deletions(-) create mode 100644 babs/templates/zip_outputs.sh.jinja2 diff --git a/babs/bootstrap.py b/babs/bootstrap.py index 5cba7472..9b116537 100644 --- a/babs/bootstrap.py +++ b/babs/bootstrap.py @@ -428,15 +428,14 @@ def _bootstrap_single_app_scripts( container_image_path=container_image_path) # Generate `_zip.sh`: ---------------------------------- - # which is a bash script of singularity run + zip - # in folder: `analysis/code` - print('\nGenerating a bash script for running container and zipping the outputs...') - print('This bash script will be named as `' + container_name + '_zip.sh`') + # Zip-only script (container execution is now handled by containers-run + # in participant_job.sh) + print('\nGenerating zip script: ' + container_name + '_zip.sh') bash_path = op.join(self.analysis_path, 'code', container_name + '_zip.sh') - container.generate_bash_run_bidsapp(bash_path, self.input_datasets, self.processing_level) + container.generate_bash_zip_outputs(bash_path, self.processing_level) self.datalad_save( path='code/' + container_name + '_zip.sh', - message='Generate script of running container', + message='Generate zip script', ) # make another folder within `code` for test jobs: diff --git a/babs/container.py b/babs/container.py index d5e56664..0c5d6b22 100644 --- a/babs/container.py +++ b/babs/container.py @@ -9,6 +9,7 @@ from babs.generate_bidsapp_runscript import ( bids_app_args_from_config, generate_bidsapp_runscript, + get_output_zipping_cmds, ) from babs.generate_submit_script import generate_submit_script, generate_test_submit_script from babs.utils import app_output_settings_from_config @@ -103,6 +104,28 @@ def sanity_check(self, analysis_path): + "'." ) + def generate_bash_zip_outputs(self, bash_path, processing_level): + """Generate a bash script that only zips BIDS App outputs.""" + dict_zip_foldernames, _ = app_output_settings_from_config(self.config) + cmd_zip = get_output_zipping_cmds(dict_zip_foldernames, processing_level) + + env = Environment( + loader=PackageLoader('babs', 'templates'), + trim_blocks=True, + lstrip_blocks=True, + autoescape=False, + undefined=StrictUndefined, + ) + template = env.get_template('zip_outputs.sh.jinja2') + script_content = template.render( + processing_level=processing_level, + cmd_zip=cmd_zip, + ) + + with open(bash_path, 'w') as f: + f.write(script_content) + os.chmod(bash_path, 0o700) + def generate_bash_run_bidsapp(self, bash_path, input_ds, processing_level): """ This is to generate a bash script that runs the BIDS App singularity image. diff --git a/babs/templates/zip_outputs.sh.jinja2 b/babs/templates/zip_outputs.sh.jinja2 new file mode 100644 index 00000000..b6e59c29 --- /dev/null +++ b/babs/templates/zip_outputs.sh.jinja2 @@ -0,0 +1,9 @@ +#!/bin/bash +set -e -u -x + +subid="$1" +{% if processing_level == 'session' %} +sesid="$2" +{% endif %} + +{{ cmd_zip }} From e6afd81e7bcb1f1d69c3fa0c7f9136e9804dfbb8 Mon Sep 17 00:00:00 2001 From: Austin Macdonald Date: Mon, 16 Mar 2026 16:36:14 -0500 Subject: [PATCH 07/12] Include .datalad in sparse-checkout for containers-run The job clone uses --no-checkout + sparse-checkout. Cone mode does not include dotfile directories by default, so .datalad/config (where containers-add registers the container) was missing. This caused "No known containers" errors at job runtime. Co-Authored-By: Claude Opus 4.6 (1M context) --- babs/templates/participant_job.sh.jinja2 | 1 + 1 file changed, 1 insertion(+) diff --git a/babs/templates/participant_job.sh.jinja2 b/babs/templates/participant_job.sh.jinja2 index 8a134d0a..052c7713 100644 --- a/babs/templates/participant_job.sh.jinja2 +++ b/babs/templates/participant_job.sh.jinja2 @@ -70,6 +70,7 @@ if ! git sparse-checkout init --cone; then fi git sparse-checkout set \ + .datalad \ code \ containers \ {% for input_dataset in input_datasets %} From 29d63f16fa9ce5a51e0bf51046e009c7d25f4ae3 Mon Sep 17 00:00:00 2001 From: Austin Macdonald Date: Mon, 16 Mar 2026 16:55:16 -0500 Subject: [PATCH 08/12] Use git rm --sparse for output cleanup Output dirs created by containers-run are outside the sparse-checkout cone, so git rm refuses to touch them without --sparse. Co-Authored-By: Claude Opus 4.6 (1M context) --- babs/templates/participant_job.sh.jinja2 | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/babs/templates/participant_job.sh.jinja2 b/babs/templates/participant_job.sh.jinja2 index 052c7713..3ed23125 100644 --- a/babs/templates/participant_job.sh.jinja2 +++ b/babs/templates/participant_job.sh.jinja2 @@ -166,7 +166,7 @@ datalad run \ bash ./code/{{ container_name }}_zip.sh "{% raw %}${subid}{% endraw %}"{% if processing_level == 'session' %} "{% raw %}${sesid}{% endraw %}"{% endif %} # Step 3: Remove raw outputs (datalad run --explicit doesn't track deletions) -git rm -rf {{ bids_app_output_dir }} +git rm -rf --sparse {{ bids_app_output_dir }} git commit -m "Remove raw outputs for {% raw %}${subid}{% endraw %}{% if processing_level == 'session' %} {% raw %}${sesid}{% endraw %}{% endif %} (zipped)" # Finish up: From 295d4c826f53dacda3a26f252ec41f6037798b33 Mon Sep 17 00:00:00 2001 From: Austin Macdonald Date: Mon, 16 Mar 2026 17:34:50 -0500 Subject: [PATCH 09/12] Fix writable check for NFS/ACL filesystems os.access() is unreliable on NFS and ACL-based filesystems. Use tempfile.TemporaryFile() to actually test write access. Co-Authored-By: Claude Opus 4.6 (1M context) --- babs/bootstrap.py | 8 ++++++-- 1 file changed, 6 insertions(+), 2 deletions(-) diff --git a/babs/bootstrap.py b/babs/bootstrap.py index 9b116537..13f1ae8f 100644 --- a/babs/bootstrap.py +++ b/babs/bootstrap.py @@ -3,6 +3,7 @@ import os import os.path as op import subprocess +import tempfile from pathlib import Path import datalad.api as dlapi @@ -77,8 +78,11 @@ def babs_bootstrap( f"The parent folder '{parent_dir}' does not exist! `babs init` won't proceed." ) - # check if parent directory is writable: - if not os.access(parent_dir, os.W_OK): + # check if parent directory is writable (os.access unreliable on NFS/ACL): + try: + with tempfile.TemporaryFile(dir=parent_dir): + pass + except OSError: raise ValueError( f"The parent folder '{parent_dir}' is not writable! `babs init` won't proceed." ) From 0e50464e389c23bfeae7602e330ac33944a79bef Mon Sep 17 00:00:00 2001 From: Austin Macdonald Date: Mon, 16 Mar 2026 17:45:08 -0500 Subject: [PATCH 10/12] Fetch container image during init for job symlink access The PROJECT_ROOT symlink in job clones points to the analysis dataset's container image. Without fetching the actual content during init, it's just an annex pointer and singularity can't open it. Co-Authored-By: Claude Opus 4.6 (1M context) --- babs/bootstrap.py | 8 ++++++++ 1 file changed, 8 insertions(+) diff --git a/babs/bootstrap.py b/babs/bootstrap.py index 13f1ae8f..57f2ace6 100644 --- a/babs/bootstrap.py +++ b/babs/bootstrap.py @@ -257,6 +257,14 @@ def babs_bootstrap( call_fmt=call_fmt, ) + # Fetch container image so the PROJECT_ROOT symlink at job time + # resolves to actual content, not a dangling annex pointer: + print('\nFetching container image...') + dlapi.get( + dataset=self.analysis_path, + path=op.join(self.analysis_path, container_image_path), + ) + container = Container(container_ds, container_name, container_config, container_image_path=container_image_path) container.sanity_check(self.analysis_path) From 5e34541628e12e05d259ce3c0fb558cdba2a4b2d Mon Sep 17 00:00:00 2001 From: Austin Macdonald Date: Mon, 16 Mar 2026 18:51:35 -0500 Subject: [PATCH 11/12] Add design doc for containers-run support Co-Authored-By: Claude Opus 4.6 (1M context) --- design/containers-run.md | 211 +++++++++++++++++++++++++++++++++++++++ 1 file changed, 211 insertions(+) create mode 100644 design/containers-run.md diff --git a/design/containers-run.md b/design/containers-run.md new file mode 100644 index 00000000..f3cdc545 --- /dev/null +++ b/design/containers-run.md @@ -0,0 +1,211 @@ +# Design: `datalad containers-run` support in BABS + +Issue: https://github.com/PennLINC/babs/issues/328 + +## Goal + +Use `datalad containers-run` instead of direct `singularity run` for BIDS app +execution. This provides: + +- **Dynamic container path discovery** — `datalad containers-list` reads the + path from the container dataset's `.datalad/config`, so BABS works with any + container dataset layout (not just the hardcoded + `containers/.datalad/environments/{name}/image`). +- **Provenance tracking** — datalad records exactly which container ran and how. +- **Compatibility with repronim/containers** — which uses + `images/bids/{name}.sif` instead of the default datalad-containers layout. + +## How it works + +### During `babs init` + +1. Clone container dataset as subdataset of analysis (unchanged) +2. `datalad containers-list` discovers the image path in the container dataset +3. `datalad containers-add` registers the container at the analysis level with: + - The discovered image path (relative to analysis) + - A `call_fmt` built from the user's `singularity_args` config +4. `datalad get` fetches the container image content so it's available for job + symlinks + +### During job execution (`participant_job.sh`) + +Three-step flow replaces the single `datalad run` that called a combined +singularity+zip script: + +1. **`datalad containers-run`** — runs the BIDS app. Container is accessed via + a PROJECT_ROOT symlink to the analysis dataset's fetched image. +2. **`datalad run`** — calls a zip-only script (no longer contains singularity + invocation or output removal). +3. **`git rm --sparse`** — removes raw outputs. Needed because `datalad run + --explicit` doesn't track file deletions. The `--sparse` flag is required + because outputs are outside the sparse-checkout cone. + +## Prior work + +An initial proof-of-concept was developed before upstream's sparse-checkout PR +(#337) landed: https://github.com/asmacdo/babs/pull/6 + +That POC was validated end-to-end on a SLURM cluster (Discovery) with both +handmade container datasets and repronim/containers. Key findings from that +work: + +- **Concurrent `datalad get` failures** — when multiple jobs run simultaneously, + they all try to fetch the same container image. Git-annex uses transfer locks, + causing all but one job to fail. This is a known git-annex bug: + https://git-annex.branchable.com/bugs/concurrent_get_from_separate_clones_fails/ + The POC solved this with ephemeral clones for the containers subdataset. + +- **Ephemeral clones work for containers but not inputs** — container images are + resolved on the host before singularity launches, so annex symlinks outside + `$PWD` are fine. Input data needs to be accessible inside the container with + `--containall -B $PWD`, so symlinks outside `$PWD` break. + +- **`datalad run --explicit` doesn't track deletions** — discovered and + documented as an upstream issue. The workaround (separate `git rm` step) is + carried forward into this implementation. + +The current implementation was recreated from scratch on top of the +sparse-checkout changes, using the POC as reference. The PROJECT_ROOT symlink +approach (from upstream) combined with `datalad get` of the image during init +avoids the concurrent get problem entirely — jobs never call `datalad get` for +the container image, they just follow the symlink to the pre-fetched content. +Ephemeral clones remain a potential future alternative. + +## Problems encountered + +### `git sparse-checkout` interaction + +The upstream sparse-checkout PR (#337) changed job clones to use +`--no-checkout` + `git sparse-checkout`. This caused two issues: + +- **`.datalad/config` not checked out** — cone mode doesn't include + dot-directories by default. Fixed by adding `.datalad` to the sparse-checkout + set. Without it, `containers-run` fails with "No known containers." + +- **`git rm` blocked by sparse-checkout** — outputs created by `containers-run` + are outside the cone, so `git rm` refuses to touch them. Fixed with + `git rm --sparse`. + +### Container image access in job clones + +Job clones don't have the container image content — they clone from input RIA +with `--no-checkout`. The existing PROJECT_ROOT symlink approach works: symlink +from the job clone's container path to the analysis dataset's copy. But this +requires the analysis dataset to have the actual content (not just an annex +pointer), so `datalad get` during init is necessary. + +### `--containall` requires `--pwd $PWD` and `-B $PWD` + +When users specify `--containall` in `singularity_args`, singularity disables +auto-binding of `$PWD` and sets the working directory to `$HOME` inside the +container. This breaks relative paths to inputs/outputs. The `call_fmt` built +during init always includes `-B $PWD --pwd $PWD` to handle this. + +### datalad `{placeholder}` conflicts with shell `${variables}` + +`datalad run` interprets `{name}` as a substitution placeholder, which +conflicts with shell variable expansion like `${subid}`. The POC initially +tried inlining zip commands in `datalad run` but this caused placeholder +conflicts. The solution is a separate zip script called by `datalad run`. + +In the jinja2 templates, shell variables use `{%raw%}/${subid}{%endraw%}` +blocks to prevent jinja2 from interpreting them, and datalad sees the literal +`${subid}` which it passes through to bash. + +### `datalad run --explicit` doesn't track deletions + +When outputs are deleted inside a `datalad run --explicit` command, datalad +doesn't record the deletion — only additions/modifications are tracked. This is +why raw output removal is a separate `git rm` step outside of datalad run. + +This was fixed upstream in https://github.com/datalad/datalad/pull/7823 but +we keep the `git rm` workaround to avoid requiring the latest datalad version. + +## Open questions + +### Pipeline support + +Pipeline currently uses a separate code path (`_bootstrap_pipeline_scripts`) +that hardcodes container paths and generates a combined script +(`pipeline_zip.sh`) with direct `singularity run` calls per step. + +Questions: +- Should pipeline be "a pipeline of 1"? i.e., unify single-app and pipeline + into one code path where single-app is just a pipeline with one step. +- If not unified, pipeline needs the same `containers-list` / `containers-add` + treatment per container step. +- Pipeline currently puts all singularity invocations in one script called by + one `datalad run`. With `containers-run`, each step would be a separate + `datalad containers-run` call — better provenance but different structure. + +### Reusing `call_fmt` / `cmdexec` from container datasets + +Currently we build `call_fmt` from the user's `singularity_args` config: +``` +singularity run -B $PWD --pwd $PWD {user_args} {img} {cmd} +``` + +Some container datasets (like repronim/containers) already define `cmdexec` in +their `.datalad/config`. Should we reuse that instead of building our own? + +Related: datalad-container's freeze script approach +(https://github.com/datalad/datalad-container/issues/287) could provide a +standardized way to handle this. + +### Container image fetch during init + +`datalad get` of the container image during `babs init` ensures the symlink +approach works at job time. But this makes init slow if the image isn't already +local (e.g., pulling a multi-GB .sif from a remote). + +Options: +- Accept the cost during init (current approach) +- Advise users to `datalad get` the image in their source container dataset + before running `babs init`, so the fetch is a fast local copy +- Add a separate `babs prepare` step for data orchestration that runs after + init but before submit + +### Container access mechanism + +Currently using PROJECT_ROOT symlink (inherited from upstream). This depends on: +- `PROJECT_ROOT` environment variable propagating through SLURM +- Shared filesystem visible from compute nodes + +An alternative is ephemeral clones (`datalad get -n --reckless ephemeral +containers`), which removes these dependencies but requires changing the clone +source from input RIA to the analysis dataset path. Deferred for now. + +### Backwards-incompatible: features from `bidsapp_run.sh.jinja2` not yet ported + +The old `bidsapp_run.sh.jinja2` template had several BABS-managed features that +are not yet in the `containers-run` path. These are **backwards-incompatible** +for users who depend on them and need to be restored or replaced. + +- **TemplateFlow bind mount** — BABS read `TEMPLATEFLOW_HOME` from the + environment and added `-B path:path_in_container` + `--env` to the singularity + command. Could be restored in BABS, or users could handle this via + `singularity_args` in their config. + +- **FreeSurfer license bind mount** — BABS intercepted `--fs-license-file` from + `bids_app_args` and converted it to a `-B` bind mount. Same options: restore + in BABS or let users specify via `singularity_args`. + +- **BIDS filter file** — dynamically generated per-job for session-level + processing, restricting BIDS layout indexing to the current session. This is + BIDS-app-specific (fmriprep gets `bold`, qsiprep gets `dwi`, etc.) and can't + be handled by user config alone. Could potentially be supported via a hook or + user-provided script mechanism (nothing like this exists in BABS today). + +- **`${PWD}/` prefix on paths** — the old script passed absolute paths + (`${PWD}/inputs/data/BIDS`). The `containers-run` path passes relative paths, + which should work because `call_fmt` includes `--pwd $PWD`. Needs verification + with BIDS apps that resolve paths differently. + +### `--explicit` flag + +We use `--explicit` on both `containers-run` and the zip `datalad run`. The +POC required `--explicit` because the old subject pruning (`rm -rf` of other +subjects' directories) dirtied the dataset, and datalad refuses to run on a +dirty dataset without `--explicit`. Upstream's sparse-checkout PR (#337) +eliminated subject pruning, so the dataset should stay clean. `--explicit` may +no longer be strictly necessary — kept for safety, could test removing it. From 802dae996c21e938c4b84c76b04f1def98592c72 Mon Sep 17 00:00:00 2001 From: Austin Macdonald Date: Mon, 16 Mar 2026 19:22:18 -0500 Subject: [PATCH 12/12] fixup: ruff --- babs/bootstrap.py | 42 +++++++++++++++++++++++++++++++----------- babs/container.py | 3 +-- 2 files changed, 32 insertions(+), 13 deletions(-) diff --git a/babs/bootstrap.py b/babs/bootstrap.py index 57f2ace6..5d37749e 100644 --- a/babs/bootstrap.py +++ b/babs/bootstrap.py @@ -230,13 +230,14 @@ def babs_bootstrap( # Discover container image path from the container dataset: containers_path = op.join(self.analysis_path, 'containers') result = dlapi.containers_list(dataset=containers_path, result_renderer='disabled') - container_info = [r for r in result - if r['action'] == 'containers' and r['name'] == container_name] + container_info = [ + r for r in result if r['action'] == 'containers' and r['name'] == container_name + ] if not container_info: available = [r['name'] for r in result if r['action'] == 'containers'] raise ValueError( f"Container '{container_name}' not found in container dataset. " - f"Available: {available}" + f'Available: {available}' ) image_path_in_ds = op.relpath(container_info[0]['path'], containers_path) container_image_path = op.join('containers', image_path_in_ds) @@ -265,8 +266,12 @@ def babs_bootstrap( path=op.join(self.analysis_path, container_image_path), ) - container = Container(container_ds, container_name, container_config, - container_image_path=container_image_path) + container = Container( + container_ds, + container_name, + container_config, + container_image_path=container_image_path, + ) container.sanity_check(self.analysis_path) # ============================================================== @@ -290,11 +295,18 @@ def babs_bootstrap( container = containers[0] else: self._bootstrap_single_app_scripts( - container_ds, container_name, container_config, system, + container_ds, + container_name, + container_config, + system, + container_image_path=container_image_path, + ) + container = Container( + container_ds, + container_name, + container_config, container_image_path=container_image_path, ) - container = Container(container_ds, container_name, container_config, - container_image_path=container_image_path) # Copy in any other files needed: self._init_import_files(container.config.get('imported_files', [])) @@ -432,12 +444,20 @@ def babs_bootstrap( print('`babs init` was successful!') def _bootstrap_single_app_scripts( - self, container_ds, container_name, container_config, system, + self, + container_ds, + container_name, + container_config, + system, container_image_path=None, ): """Bootstrap scripts for single BIDS app configuration.""" - container = Container(container_ds, container_name, container_config, - container_image_path=container_image_path) + container = Container( + container_ds, + container_name, + container_config, + container_image_path=container_image_path, + ) # Generate `_zip.sh`: ---------------------------------- # Zip-only script (container execution is now handled by containers-run diff --git a/babs/container.py b/babs/container.py index 0c5d6b22..cc12ac72 100644 --- a/babs/container.py +++ b/babs/container.py @@ -18,8 +18,7 @@ class Container: """This class is for the BIDS App Container""" - def __init__(self, container_ds, container_name, config_yaml_file, - container_image_path=None): + def __init__(self, container_ds, container_name, config_yaml_file, container_image_path=None): """ This is to initialize Container class.