Skip to content

Commit 15a6d6f

Browse files
committed
Use overlayfs to fix inode issue
1 parent c56b73d commit 15a6d6f

File tree

5 files changed

+324
-1
lines changed

5 files changed

+324
-1
lines changed

babs/templates/participant_job.sh.jinja2

Lines changed: 151 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -35,8 +35,70 @@ BRANCH="job-${%raw%}{{%endraw%}{{varname_jobid}}{%raw%}}{%endraw%}-${%raw%}{{%en
3535
mkdir "${BRANCH}"
3636
cd "${BRANCH}"
3737

38+
# Optional overlay mode for input materialization-heavy operations.
39+
# Recommended for HPC: site admins publish one helper image with
40+
# datalad/git/git-annex/singularity available.
41+
BABS_USE_INPUT_OVERLAY="${BABS_USE_INPUT_OVERLAY:-0}"
42+
BABS_OVERLAY_SIZE_GB="${BABS_OVERLAY_SIZE_GB:-20}"
43+
BABS_OVERLAY_KEEP="${BABS_OVERLAY_KEEP:-0}"
44+
BABS_OVERLAY_WORKDIR="${BABS_OVERLAY_WORKDIR:-/home/babs-overlay-work}"
45+
BABS_OVERLAY_HELPER_IMAGE="${BABS_OVERLAY_HELPER_IMAGE:-}"
46+
BABS_INPUT_OVERLAY_PATH="${BABS_INPUT_OVERLAY_PATH:-${PWD}/input-materialization.ext3}"
47+
48+
if [ "${BABS_USE_INPUT_OVERLAY}" = "1" ]; then
49+
echo "# BABS input overlay mode is enabled."
50+
echo "# Helper image: ${BABS_OVERLAY_HELPER_IMAGE:-<unset>}"
51+
echo "# Overlay image: ${BABS_INPUT_OVERLAY_PATH}"
52+
echo "# Overlay size (GiB): ${BABS_OVERLAY_SIZE_GB}"
53+
echo "# Overlay workdir in helper: ${BABS_OVERLAY_WORKDIR}"
54+
55+
if [ -z "${BABS_OVERLAY_HELPER_IMAGE}" ] || [ ! -r "${BABS_OVERLAY_HELPER_IMAGE}" ]; then
56+
echo "ERROR: BABS_USE_INPUT_OVERLAY=1 but BABS_OVERLAY_HELPER_IMAGE is unset or unreadable." >&2
57+
echo "Build a helper image once from this repo using docker/babs-input-overlay-helper.def," >&2
58+
echo "then set BABS_OVERLAY_HELPER_IMAGE to that .sif path in your job environment." >&2
59+
exit 2
60+
fi
61+
62+
if ! command -v singularity >/dev/null 2>&1; then
63+
echo "ERROR: singularity command not found, but overlay mode requires it." >&2
64+
exit 2
65+
fi
66+
67+
if [ ! -e "${BABS_INPUT_OVERLAY_PATH}" ]; then
68+
echo "# Creating overlay file at ${BABS_INPUT_OVERLAY_PATH}"
69+
truncate -s "${BABS_OVERLAY_SIZE_GB}g" "${BABS_INPUT_OVERLAY_PATH}"
70+
if ! mkfs.ext3 -F -t ext -m 0 -q -E root_owner "${BABS_INPUT_OVERLAY_PATH}"; then
71+
echo "# mkfs.ext3 with -E root_owner failed; retrying without it."
72+
mkfs.ext3 -F -t ext -m 0 -q "${BABS_INPUT_OVERLAY_PATH}"
73+
fi
74+
fi
75+
76+
if ! singularity exec \
77+
--overlay "${BABS_INPUT_OVERLAY_PATH}" \
78+
"${BABS_OVERLAY_HELPER_IMAGE}" \
79+
bash -lc "command -v datalad >/dev/null 2>&1 && command -v git >/dev/null 2>&1 && command -v git-annex >/dev/null 2>&1 && command -v singularity >/dev/null 2>&1"; then
80+
echo "ERROR: helper image is missing required commands (datalad, git, git-annex, singularity)." >&2
81+
echo "Please ask your admins to build/update the site helper image and retry." >&2
82+
exit 2
83+
fi
84+
85+
singularity exec \
86+
--overlay "${BABS_INPUT_OVERLAY_PATH}" \
87+
"${BABS_OVERLAY_HELPER_IMAGE}" \
88+
env \
89+
dssource="${dssource}" \
90+
pushgitremote="${pushgitremote}" \
91+
subid="${subid}" \
92+
BRANCH="${BRANCH}" \
93+
{% if processing_level == 'session' %}
94+
sesid="${sesid}" \
95+
{% endif %}
96+
BABS_OVERLAY_WORKDIR="${BABS_OVERLAY_WORKDIR}" \
97+
bash -s <<'BABS_OVERLAY_JOB_EOF'
3898
# datalad clone the input ria:
3999
echo '# Clone the data from input RIA:'
100+
mkdir -p "${BABS_OVERLAY_WORKDIR}"
101+
cd "${BABS_OVERLAY_WORKDIR}"
40102
datalad clone "${dssource}" ds
41103
cd ds
42104

@@ -110,9 +172,97 @@ flock "${DSLOCKFILE}" git push outputstore
110172
datalad drop -r . --reckless availability --reckless modification
111173

112174
git annex dead here
175+
BABS_OVERLAY_JOB_EOF
176+
177+
if [ "${BABS_OVERLAY_KEEP}" = "1" ]; then
178+
echo "# Keeping overlay file for debugging: ${BABS_INPUT_OVERLAY_PATH}"
179+
else
180+
rm -f "${BABS_INPUT_OVERLAY_PATH}"
181+
fi
182+
else
183+
# datalad clone the input ria:
184+
echo '# Clone the data from input RIA:'
185+
datalad clone "${dssource}" ds
186+
cd ds
187+
188+
# set up the result deposition:
189+
echo '# Register output RIA as remote for result deposition:'
190+
git remote add outputstore "${pushgitremote}"
191+
192+
# set up a new branch:
193+
echo "# Create a new branch for this job's results:"
194+
git checkout -b "${BRANCH}"
195+
196+
# Start of the application-specific code: ------------------------------
197+
198+
# pull down input data (but don't retrieve the data content) and remove other sub's data:
199+
echo "# Pull down the input subject (or dataset) but don't retrieve data contents:"
200+
{% for input_dataset in input_datasets %}
201+
{% if not input_dataset['is_zipped'] %}
202+
datalad get -n "{{ input_dataset['path_in_babs'] }}/${subid}"
203+
(cd {{ input_dataset['path_in_babs'] }} && find . -type d -name 'sub*' | grep -v "$subid" | xargs rm -rf)
204+
{% if processing_level == 'session' %}
205+
(cd {{ input_dataset['path_in_babs'] }}/"${subid}" && find . -type d -name 'ses*' | grep -v "$sesid" | xargs rm -rf)
206+
{% endif %}
207+
{% else %}
208+
datalad get -n "{{ input_dataset['path_in_babs'] }}"
209+
(cd {{ input_dataset['path_in_babs'] }} && find . -type f -name 'sub*.zip' | grep -v "$subid" | xargs rm -f)
210+
{% endif %}
211+
{% endfor %}
212+
213+
{{ zip_locator_text }}
214+
215+
# datalad run:
216+
datalad run \
217+
-i "{{ run_script_relpath if run_script_relpath else 'code/' + container_name + '_zip.sh' }}" \
218+
{% for input_dataset in input_datasets %}
219+
{% if not input_dataset['is_zipped'] %}
220+
-i "{{ input_dataset['unzipped_path_containing_subject_dirs'] }}/${subid}{% if processing_level == 'session' %}/${sesid}{% endif %}" \
221+
-i "{{ input_dataset['unzipped_path_containing_subject_dirs'] }}/*json" \
222+
{% else %}
223+
-i "${%raw%}{{%endraw%}{{ input_dataset['name'].upper() }}_ZIP{%raw%}}{%endraw%}" \
224+
{% endif %}
225+
{% endfor %}
226+
{% if container_images %}
227+
{% for image_path in container_images %}
228+
-i "{{ image_path }}" \
229+
{% endfor %}
230+
{% elif not run_script_relpath %}
231+
-i "containers/.datalad/environments/{{container_name}}/image" \
232+
{% endif %}
233+
{% if datalad_expand_inputs %}
234+
--expand inputs \
235+
{% endif %}
236+
--explicit \
237+
{% if zip_foldernames is not none %}
238+
{% for key, value in zip_foldernames.items() %}
239+
-o "${subid}{% if processing_level == 'session' %}_${sesid}{% endif %}_{{ key }}-{{ value }}.zip" \
240+
{% endfor %}
241+
{% endif %}
242+
-m "{{ datalad_run_message if datalad_run_message is defined else container_name }} ${subid}{% if processing_level == 'session' %} ${sesid}{% endif %}" \
243+
"bash ./{{ run_script_relpath if run_script_relpath else 'code/' + container_name + '_zip.sh' }} ${subid} {% if processing_level == 'session' %} ${sesid}{% endif %}{% for input_dataset in input_datasets %}{% if input_dataset['is_zipped'] %} ${%raw%}{{%endraw%}{{ input_dataset['name'].upper() }}_ZIP{%raw%}}{%endraw%}{%endif%}{%endfor%}"
244+
245+
# Finish up:
246+
# push result file content to output RIA storage:
247+
echo '# Push result file content to output RIA storage:'
248+
datalad push --to output-storage
249+
250+
# push the output branch:
251+
echo '# Push the branch with provenance records:'
252+
flock "${DSLOCKFILE}" git push outputstore
253+
254+
# Delete:
255+
datalad drop -r . --reckless availability --reckless modification
256+
257+
git annex dead here
258+
fi
113259

114260
# cd out of $BRANCH:
115261
cd ../..
116-
rm -rf "${BRANCH}"
262+
if [ "${BABS_USE_INPUT_OVERLAY}" = "1" ] && [ "${BABS_OVERLAY_KEEP}" = "1" ]; then
263+
echo "# Keeping ${BRANCH} because BABS_OVERLAY_KEEP=1."
264+
else
265+
rm -rf "${BRANCH}"
266+
fi
117267

118268
echo SUCCESS
Lines changed: 48 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,48 @@
1+
Bootstrap: docker
2+
From: ubuntu:22.04
3+
4+
%labels
5+
Author babs
6+
Description BABS helper image for overlay-mode DataLad operations
7+
8+
%post
9+
set -e
10+
export DEBIAN_FRONTEND=noninteractive
11+
apt-get update
12+
apt-get install -y --no-install-recommends \
13+
bash \
14+
ca-certificates \
15+
coreutils \
16+
datalad \
17+
git \
18+
git-annex \
19+
gnupg \
20+
lsb-release \
21+
python3 \
22+
python3-pip \
23+
sed \
24+
util-linux
25+
26+
# Try to provide Apptainer/Singularity in the helper image.
27+
# We normalize on "singularity" because participant_job.sh calls singularity.
28+
if apt-cache show apptainer >/dev/null 2>&1; then
29+
apt-get install -y --no-install-recommends apptainer
30+
ln -sf /usr/bin/apptainer /usr/local/bin/singularity
31+
elif apt-cache show singularity-container >/dev/null 2>&1; then
32+
apt-get install -y --no-install-recommends singularity-container
33+
fi
34+
35+
apt-get clean
36+
rm -rf /var/lib/apt/lists/*
37+
38+
%environment
39+
export LC_ALL=C
40+
export LANG=C
41+
42+
%test
43+
set -e
44+
command -v bash
45+
command -v datalad
46+
command -v git
47+
command -v git-annex
48+
command -v singularity || command -v apptainer

docs/preparation_config_yaml_file.rst

Lines changed: 71 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -771,6 +771,77 @@ Notes:
771771
* The "path where intermediate results should be stored" (e.g., ``-w``) is directly used by BIDS Apps.
772772
It is also a sub-folder of the space specified in this section.
773773

774+
.. _input-overlay-mode:
775+
776+
Input materialization overlay mode (optional, advanced)
777+
=======================================================
778+
779+
For large studies on HPC, ``datalad clone`` / ``datalad get`` can overwhelm
780+
shared filesystems (many tiny file creates/stats/deletes across many concurrent jobs).
781+
BABS supports an optional overlay-based participant job mode to isolate these operations.
782+
783+
When enabled, participant jobs run DataLad-heavy steps through::
784+
785+
singularity exec --overlay <ext3_overlay.img> <helper_image.sif> <command>
786+
787+
The helper image is a **runtime image for BABS job plumbing** (not your BIDS App image).
788+
It must contain at least:
789+
790+
* ``datalad``
791+
* ``git``
792+
* ``git-annex``
793+
* ``singularity`` (or a compatible ``singularity`` command in the helper image)
794+
* basic shell/coreutils
795+
796+
Build your helper image once from this repository
797+
-------------------------------------------------
798+
799+
BABS includes an Apptainer definition file for this helper image at:
800+
801+
* ``docker/babs-input-overlay-helper.def``
802+
803+
Build it once (from the root of this repo). Use ``--test`` so the definition's
804+
``%test`` section runs and verifies required commands (bash, datalad, git,
805+
git-annex, singularity/apptainer)::
806+
807+
cd /path/to/babs
808+
apptainer build --test "${HOME}/apptainer/babs-overlay-helper.sif" docker/babs-input-overlay-helper.def
809+
810+
Then point ``BABS_OVERLAY_HELPER_IMAGE`` to this image path in your jobs.
811+
812+
.. note::
813+
814+
BABS does **not** build this helper image automatically during jobs.
815+
Build it once before running overlay-mode jobs.
816+
817+
User-side job configuration
818+
---------------------------
819+
820+
Set these variables in ``script_preamble`` (or your scheduler environment)::
821+
822+
script_preamble: |
823+
source "${CONDA_PREFIX}"/bin/activate babs
824+
export BABS_USE_INPUT_OVERLAY=1
825+
export BABS_OVERLAY_HELPER_IMAGE="${HOME}/apptainer/babs-overlay-helper.sif"
826+
export BABS_OVERLAY_SIZE_GB=20
827+
# Optional: keep ext3 overlay + branch dir for debugging
828+
# export BABS_OVERLAY_KEEP=1
829+
830+
Variable meanings:
831+
832+
* ``BABS_USE_INPUT_OVERLAY``: enable (``1``) / disable (``0``) overlay mode.
833+
* ``BABS_OVERLAY_HELPER_IMAGE``: absolute path to helper image (required when enabled).
834+
* ``BABS_OVERLAY_SIZE_GB``: ext3 overlay size in GiB (default: ``20``).
835+
* ``BABS_OVERLAY_KEEP``: keep overlay and branch directory for debugging (default: ``0``).
836+
837+
Expected runtime messages
838+
-------------------------
839+
840+
When enabled, jobs print helper image path, overlay path, and overlay size at startup.
841+
If helper image is missing/unreadable, jobs fail fast with a message instructing users
842+
to build the helper image once from ``docker/babs-input-overlay-helper.def`` and set
843+
``BABS_OVERLAY_HELPER_IMAGE`` correctly.
844+
774845
.. _required_files:
775846

776847

docs/walkthrough.rst

Lines changed: 18 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -308,6 +308,24 @@ There are several lines (highlighted above) that require customization based on
308308
309309
* For more, please see: :ref:`script-preamble`.
310310

311+
* Optional (recommended for very large job arrays on shared filesystems):
312+
enable input materialization overlay mode.
313+
This routes DataLad-heavy steps through a persistent ext3 overlay and reduces
314+
metadata pressure on shared storage.
315+
Example::
316+
317+
script_preamble: |
318+
source "${CONDA_PREFIX}"/bin/activate babs
319+
module load singularity
320+
export BABS_USE_INPUT_OVERLAY=1
321+
export BABS_OVERLAY_HELPER_IMAGE="${HOME}/apptainer/babs-overlay-helper.sif"
322+
export BABS_OVERLAY_SIZE_GB=20
323+
324+
The helper image above should be built once from this repo (definition file:
325+
``docker/babs-input-overlay-helper.def``) and should include
326+
``datalad``, ``git``, ``git-annex``, and ``singularity``.
327+
See :ref:`input-overlay-mode` for full details.
328+
311329
* Section ``input_datasets``:
312330
* Describe the inputs to the BIDS App here.
313331
* Specify the original location of the data.

tests/test_generate_submit_script.py

Lines changed: 36 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -172,3 +172,39 @@ def test_generate_submit_script_pipeline(tmp_path):
172172
if not passed:
173173
print(script_content)
174174
assert passed, status
175+
176+
177+
def test_generate_submit_script_contains_overlay_guidance(tmp_path):
178+
"""Ensure overlay mode hooks and helper-image guidance are rendered."""
179+
config_path = NOTEBOOKS_DIR / 'eg_aslprep-0-7-5.yaml'
180+
config = read_yaml(config_path)
181+
182+
script_content = generate_submit_script(
183+
queue_system='slurm',
184+
cluster_resources_config=config['cluster_resources'],
185+
script_preamble=config['script_preamble'],
186+
job_scratch_directory=config['job_compute_space'],
187+
input_datasets=input_datasets_prep,
188+
processing_level='subject',
189+
container_name='aslprep-0-7-5',
190+
zip_foldernames=config['zip_foldernames'],
191+
)
192+
193+
assert 'BABS_USE_INPUT_OVERLAY="${BABS_USE_INPUT_OVERLAY:-0}"' in script_content
194+
assert 'BABS_OVERLAY_HELPER_IMAGE="${BABS_OVERLAY_HELPER_IMAGE:-}"' in script_content
195+
assert 'BABS_OVERLAY_KEEP="${BABS_OVERLAY_KEEP:-0}"' in script_content
196+
assert 'singularity exec \\' in script_content
197+
assert '--overlay "${BABS_INPUT_OVERLAY_PATH}"' in script_content
198+
assert (
199+
'Build a helper image once from this repo using docker/babs-input-overlay-helper.def'
200+
in script_content
201+
)
202+
assert 'BABS_OVERLAY_HELPER_IMAGE' in script_content
203+
204+
out_fn = tmp_path / 'participant_job_overlay.sh'
205+
with open(out_fn, 'w') as f:
206+
f.write(script_content)
207+
passed, status = run_shellcheck(str(out_fn))
208+
if not passed:
209+
print(script_content)
210+
assert passed, status

0 commit comments

Comments
 (0)