diff --git a/babs/templates/participant_job.sh.jinja2 b/babs/templates/participant_job.sh.jinja2 index 6e505a12..34dd656f 100644 --- a/babs/templates/participant_job.sh.jinja2 +++ b/babs/templates/participant_job.sh.jinja2 @@ -35,8 +35,70 @@ BRANCH="job-${%raw%}{{%endraw%}{{varname_jobid}}{%raw%}}{%endraw%}-${%raw%}{{%en mkdir "${BRANCH}" cd "${BRANCH}" +# Optional overlay mode for input materialization-heavy operations. +# Recommended for HPC: site admins publish one helper image with +# datalad/git/git-annex/singularity available. +BABS_USE_INPUT_OVERLAY="${BABS_USE_INPUT_OVERLAY:-0}" +BABS_OVERLAY_SIZE_GB="${BABS_OVERLAY_SIZE_GB:-20}" +BABS_OVERLAY_KEEP="${BABS_OVERLAY_KEEP:-0}" +BABS_OVERLAY_WORKDIR="${BABS_OVERLAY_WORKDIR:-/home/babs-overlay-work}" +BABS_OVERLAY_HELPER_IMAGE="${BABS_OVERLAY_HELPER_IMAGE:-}" +BABS_INPUT_OVERLAY_PATH="${BABS_INPUT_OVERLAY_PATH:-${PWD}/input-materialization.ext3}" + +if [ "${BABS_USE_INPUT_OVERLAY}" = "1" ]; then + echo "# BABS input overlay mode is enabled." + echo "# Helper image: ${BABS_OVERLAY_HELPER_IMAGE:-}" + echo "# Overlay image: ${BABS_INPUT_OVERLAY_PATH}" + echo "# Overlay size (GiB): ${BABS_OVERLAY_SIZE_GB}" + echo "# Overlay workdir in helper: ${BABS_OVERLAY_WORKDIR}" + + if [ -z "${BABS_OVERLAY_HELPER_IMAGE}" ] || [ ! -r "${BABS_OVERLAY_HELPER_IMAGE}" ]; then + echo "ERROR: BABS_USE_INPUT_OVERLAY=1 but BABS_OVERLAY_HELPER_IMAGE is unset or unreadable." >&2 + echo "Build a helper image once from this repo using docker/babs-input-overlay-helper.def," >&2 + echo "then set BABS_OVERLAY_HELPER_IMAGE to that .sif path in your job environment." >&2 + exit 2 + fi + + if ! command -v singularity >/dev/null 2>&1; then + echo "ERROR: singularity command not found, but overlay mode requires it." >&2 + exit 2 + fi + + if [ ! -e "${BABS_INPUT_OVERLAY_PATH}" ]; then + echo "# Creating overlay file at ${BABS_INPUT_OVERLAY_PATH}" + truncate -s "${BABS_OVERLAY_SIZE_GB}g" "${BABS_INPUT_OVERLAY_PATH}" + if ! mkfs.ext3 -F -t ext -m 0 -q -E root_owner "${BABS_INPUT_OVERLAY_PATH}"; then + echo "# mkfs.ext3 with -E root_owner failed; retrying without it." + mkfs.ext3 -F -t ext -m 0 -q "${BABS_INPUT_OVERLAY_PATH}" + fi + fi + + if ! singularity exec \ + --overlay "${BABS_INPUT_OVERLAY_PATH}" \ + "${BABS_OVERLAY_HELPER_IMAGE}" \ + bash -lc "command -v datalad >/dev/null 2>&1 && command -v git >/dev/null 2>&1 && command -v git-annex >/dev/null 2>&1 && command -v singularity >/dev/null 2>&1"; then + echo "ERROR: helper image is missing required commands (datalad, git, git-annex, singularity)." >&2 + echo "Please ask your admins to build/update the site helper image and retry." >&2 + exit 2 + fi + + singularity exec \ + --overlay "${BABS_INPUT_OVERLAY_PATH}" \ + "${BABS_OVERLAY_HELPER_IMAGE}" \ + env \ + dssource="${dssource}" \ + pushgitremote="${pushgitremote}" \ + subid="${subid}" \ + BRANCH="${BRANCH}" \ +{% if processing_level == 'session' %} + sesid="${sesid}" \ +{% endif %} + BABS_OVERLAY_WORKDIR="${BABS_OVERLAY_WORKDIR}" \ + bash -s <<'BABS_OVERLAY_JOB_EOF' # datalad clone the input ria: echo '# Clone the data from input RIA:' +mkdir -p "${BABS_OVERLAY_WORKDIR}" +cd "${BABS_OVERLAY_WORKDIR}" datalad clone "${dssource}" ds cd ds @@ -110,9 +172,97 @@ flock "${DSLOCKFILE}" git push outputstore datalad drop -r . --reckless availability --reckless modification git annex dead here +BABS_OVERLAY_JOB_EOF + + if [ "${BABS_OVERLAY_KEEP}" = "1" ]; then + echo "# Keeping overlay file for debugging: ${BABS_INPUT_OVERLAY_PATH}" + else + rm -f "${BABS_INPUT_OVERLAY_PATH}" + fi +else + # datalad clone the input ria: + echo '# Clone the data from input RIA:' + datalad clone "${dssource}" ds + cd ds + + # set up the result deposition: + echo '# Register output RIA as remote for result deposition:' + git remote add outputstore "${pushgitremote}" + + # set up a new branch: + echo "# Create a new branch for this job's results:" + git checkout -b "${BRANCH}" + + # Start of the application-specific code: ------------------------------ + + # pull down input data (but don't retrieve the data content) and remove other sub's data: + echo "# Pull down the input subject (or dataset) but don't retrieve data contents:" +{% for input_dataset in input_datasets %} +{% if not input_dataset['is_zipped'] %} + datalad get -n "{{ input_dataset['path_in_babs'] }}/${subid}" + (cd {{ input_dataset['path_in_babs'] }} && find . -type d -name 'sub*' | grep -v "$subid" | xargs rm -rf) +{% if processing_level == 'session' %} + (cd {{ input_dataset['path_in_babs'] }}/"${subid}" && find . -type d -name 'ses*' | grep -v "$sesid" | xargs rm -rf) +{% endif %} +{% else %} + datalad get -n "{{ input_dataset['path_in_babs'] }}" + (cd {{ input_dataset['path_in_babs'] }} && find . -type f -name 'sub*.zip' | grep -v "$subid" | xargs rm -f) +{% endif %} +{% endfor %} + +{{ zip_locator_text }} + + # datalad run: + datalad run \ + -i "{{ run_script_relpath if run_script_relpath else 'code/' + container_name + '_zip.sh' }}" \ +{% for input_dataset in input_datasets %} +{% if not input_dataset['is_zipped'] %} + -i "{{ input_dataset['unzipped_path_containing_subject_dirs'] }}/${subid}{% if processing_level == 'session' %}/${sesid}{% endif %}" \ + -i "{{ input_dataset['unzipped_path_containing_subject_dirs'] }}/*json" \ +{% else %} + -i "${%raw%}{{%endraw%}{{ input_dataset['name'].upper() }}_ZIP{%raw%}}{%endraw%}" \ +{% endif %} +{% endfor %} +{% if container_images %} +{% for image_path in container_images %} + -i "{{ image_path }}" \ +{% endfor %} +{% elif not run_script_relpath %} + -i "containers/.datalad/environments/{{container_name}}/image" \ +{% endif %} +{% if datalad_expand_inputs %} + --expand inputs \ +{% endif %} + --explicit \ +{% if zip_foldernames is not none %} +{% for key, value in zip_foldernames.items() %} + -o "${subid}{% if processing_level == 'session' %}_${sesid}{% endif %}_{{ key }}-{{ value }}.zip" \ +{% endfor %} +{% endif %} + -m "{{ datalad_run_message if datalad_run_message is defined else container_name }} ${subid}{% if processing_level == 'session' %} ${sesid}{% endif %}" \ + "bash ./{{ run_script_relpath if run_script_relpath else 'code/' + container_name + '_zip.sh' }} ${subid} {% if processing_level == 'session' %} ${sesid}{% endif %}{% for input_dataset in input_datasets %}{% if input_dataset['is_zipped'] %} ${%raw%}{{%endraw%}{{ input_dataset['name'].upper() }}_ZIP{%raw%}}{%endraw%}{%endif%}{%endfor%}" + + # Finish up: + # push result file content to output RIA storage: + echo '# Push result file content to output RIA storage:' + datalad push --to output-storage + + # push the output branch: + echo '# Push the branch with provenance records:' + flock "${DSLOCKFILE}" git push outputstore + + # Delete: + datalad drop -r . --reckless availability --reckless modification + + git annex dead here +fi # cd out of $BRANCH: cd ../.. -rm -rf "${BRANCH}" +if [ "${BABS_USE_INPUT_OVERLAY}" = "1" ] && [ "${BABS_OVERLAY_KEEP}" = "1" ]; then + echo "# Keeping ${BRANCH} because BABS_OVERLAY_KEEP=1." +else + rm -rf "${BRANCH}" +fi echo SUCCESS \ No newline at end of file diff --git a/docker/babs-input-overlay-helper.def b/docker/babs-input-overlay-helper.def new file mode 100644 index 00000000..b65165ac --- /dev/null +++ b/docker/babs-input-overlay-helper.def @@ -0,0 +1,59 @@ +Bootstrap: docker +From: ubuntu:22.04 + +%labels + Author babs + Description BABS helper image for overlay-mode DataLad operations + +%post + set -e + export DEBIAN_FRONTEND=noninteractive + apt-get update + apt-get install -y --no-install-recommends \ + bash \ + ca-certificates \ + coreutils \ + datalad \ + git \ + git-annex \ + gnupg \ + lsb-release \ + python3 \ + python3-pip \ + sed \ + util-linux + + # Try to provide Apptainer/Singularity in the helper image. We normalize on + # "singularity" because participant_job.sh calls singularity. + # 1) Default repos (e.g. Neurodebian, or distro that ships apptainer). + if apt-cache show apptainer >/dev/null 2>&1; then + apt-get install -y --no-install-recommends apptainer && \ + ln -sf /usr/bin/apptainer /usr/local/bin/singularity || true + elif apt-cache show singularity-container >/dev/null 2>&1; then + apt-get install -y --no-install-recommends singularity-container || true + fi + # 2) Fallback: install from official Apptainer PPA when not in default repos. + if ! command -v singularity >/dev/null 2>&1 && ! command -v apptainer >/dev/null 2>&1; then + ( set +e + apt-get install -y --no-install-recommends software-properties-common + add-apt-repository -y ppa:apptainer/ppa + apt-get update + apt-get install -y --no-install-recommends apptainer + ln -sf /usr/bin/apptainer /usr/local/bin/singularity + ) || true + fi + + apt-get clean + rm -rf /var/lib/apt/lists/* + +%environment + export LC_ALL=C + export LANG=C + +%test + set -e + command -v bash + command -v datalad + command -v git + command -v git-annex + command -v singularity || command -v apptainer diff --git a/docs/preparation_config_yaml_file.rst b/docs/preparation_config_yaml_file.rst index 767a95ac..e410f245 100644 --- a/docs/preparation_config_yaml_file.rst +++ b/docs/preparation_config_yaml_file.rst @@ -771,6 +771,79 @@ Notes: * The "path where intermediate results should be stored" (e.g., ``-w``) is directly used by BIDS Apps. It is also a sub-folder of the space specified in this section. +.. _input-overlay-mode: + +Input materialization overlay mode (optional, advanced) +======================================================= + +For large studies on HPC, ``datalad clone`` / ``datalad get`` can overwhelm +shared filesystems (many tiny file creates/stats/deletes across many concurrent jobs). +BABS supports an optional overlay-based participant job mode to isolate these operations. + +When enabled, participant jobs run DataLad-heavy steps through:: + + singularity exec --overlay + +The helper image is a **runtime image for BABS job plumbing** (not your BIDS App image). +It must contain at least: + +* ``datalad`` +* ``git`` +* ``git-annex`` +* ``singularity`` (or a compatible ``singularity`` command in the helper image) +* basic shell/coreutils + +Build your helper image once from this repository +------------------------------------------------- + +BABS includes an Apptainer definition file for this helper image at: + +* ``docker/babs-input-overlay-helper.def`` + +Build it once (from the root of this repo). The definition's ``%test`` section +runs by default after build. It runs inside the image and checks that required +executables exist (``command -v`` for bash, datalad, git, git-annex, and +singularity or apptainer); the build fails if any are missing. Do not use +``--notest`` if you want that check:: + + cd /path/to/babs + apptainer build "${HOME}/apptainer/babs-overlay-helper.sif" docker/babs-input-overlay-helper.def + +Then point ``BABS_OVERLAY_HELPER_IMAGE`` to this image path in your jobs. + +.. note:: + + BABS does **not** build this helper image automatically during jobs. + Build it once before running overlay-mode jobs. + +User-side job configuration +--------------------------- + +Set these variables in ``script_preamble`` (or your scheduler environment):: + + script_preamble: | + source "${CONDA_PREFIX}"/bin/activate babs + export BABS_USE_INPUT_OVERLAY=1 + export BABS_OVERLAY_HELPER_IMAGE="${HOME}/apptainer/babs-overlay-helper.sif" + export BABS_OVERLAY_SIZE_GB=20 + # Optional: keep ext3 overlay + branch dir for debugging + # export BABS_OVERLAY_KEEP=1 + +Variable meanings: + +* ``BABS_USE_INPUT_OVERLAY``: enable (``1``) / disable (``0``) overlay mode. +* ``BABS_OVERLAY_HELPER_IMAGE``: absolute path to helper image (required when enabled). +* ``BABS_OVERLAY_SIZE_GB``: ext3 overlay size in GiB (default: ``20``). +* ``BABS_OVERLAY_KEEP``: keep overlay and branch directory for debugging (default: ``0``). + +Expected runtime messages +------------------------- + +When enabled, jobs print helper image path, overlay path, and overlay size at startup. +If helper image is missing/unreadable, jobs fail fast with a message instructing users +to build the helper image once from ``docker/babs-input-overlay-helper.def`` and set +``BABS_OVERLAY_HELPER_IMAGE`` correctly. + .. _required_files: diff --git a/docs/walkthrough.rst b/docs/walkthrough.rst index c67c3e10..02960a3f 100644 --- a/docs/walkthrough.rst +++ b/docs/walkthrough.rst @@ -308,6 +308,24 @@ There are several lines (highlighted above) that require customization based on * For more, please see: :ref:`script-preamble`. + * Optional (recommended for very large job arrays on shared filesystems): + enable input materialization overlay mode. + This routes DataLad-heavy steps through a persistent ext3 overlay and reduces + metadata pressure on shared storage. + Example:: + + script_preamble: | + source "${CONDA_PREFIX}"/bin/activate babs + module load singularity + export BABS_USE_INPUT_OVERLAY=1 + export BABS_OVERLAY_HELPER_IMAGE="${HOME}/apptainer/babs-overlay-helper.sif" + export BABS_OVERLAY_SIZE_GB=20 + + The helper image above should be built once from this repo (definition file: + ``docker/babs-input-overlay-helper.def``) and should include + ``datalad``, ``git``, ``git-annex``, and ``singularity``. + See :ref:`input-overlay-mode` for full details. + * Section ``input_datasets``: * Describe the inputs to the BIDS App here. * Specify the original location of the data. diff --git a/tests/test_generate_submit_script.py b/tests/test_generate_submit_script.py index 6d0868a3..8cad3365 100644 --- a/tests/test_generate_submit_script.py +++ b/tests/test_generate_submit_script.py @@ -172,3 +172,39 @@ def test_generate_submit_script_pipeline(tmp_path): if not passed: print(script_content) assert passed, status + + +def test_generate_submit_script_contains_overlay_guidance(tmp_path): + """Ensure overlay mode hooks and helper-image guidance are rendered.""" + config_path = NOTEBOOKS_DIR / 'eg_aslprep-0-7-5.yaml' + config = read_yaml(config_path) + + script_content = generate_submit_script( + queue_system='slurm', + cluster_resources_config=config['cluster_resources'], + script_preamble=config['script_preamble'], + job_scratch_directory=config['job_compute_space'], + input_datasets=input_datasets_prep, + processing_level='subject', + container_name='aslprep-0-7-5', + zip_foldernames=config['zip_foldernames'], + ) + + assert 'BABS_USE_INPUT_OVERLAY="${BABS_USE_INPUT_OVERLAY:-0}"' in script_content + assert 'BABS_OVERLAY_HELPER_IMAGE="${BABS_OVERLAY_HELPER_IMAGE:-}"' in script_content + assert 'BABS_OVERLAY_KEEP="${BABS_OVERLAY_KEEP:-0}"' in script_content + assert 'singularity exec \\' in script_content + assert '--overlay "${BABS_INPUT_OVERLAY_PATH}"' in script_content + assert ( + 'Build a helper image once from this repo using docker/babs-input-overlay-helper.def' + in script_content + ) + assert 'BABS_OVERLAY_HELPER_IMAGE' in script_content + + out_fn = tmp_path / 'participant_job_overlay.sh' + with open(out_fn, 'w') as f: + f.write(script_content) + passed, status = run_shellcheck(str(out_fn)) + if not passed: + print(script_content) + assert passed, status