@@ -35,8 +35,70 @@ BRANCH="job-${%raw%}{{%endraw%}{{varname_jobid}}{%raw%}}{%endraw%}-${%raw%}{{%en
3535mkdir "${BRANCH}"
3636cd "${BRANCH}"
3737
38+ # Optional overlay mode for input materialization-heavy operations.
39+ # Recommended for HPC: site admins publish one helper image with
40+ # datalad/git/git-annex/singularity available.
41+ BABS_USE_INPUT_OVERLAY="${BABS_USE_INPUT_OVERLAY:-0}"
42+ BABS_OVERLAY_SIZE_GB="${BABS_OVERLAY_SIZE_GB:-20}"
43+ BABS_OVERLAY_KEEP="${BABS_OVERLAY_KEEP:-0}"
44+ BABS_OVERLAY_WORKDIR="${BABS_OVERLAY_WORKDIR:-/home/babs-overlay-work}"
45+ BABS_OVERLAY_HELPER_IMAGE="${BABS_OVERLAY_HELPER_IMAGE:-}"
46+ BABS_INPUT_OVERLAY_PATH="${BABS_INPUT_OVERLAY_PATH:-${PWD}/input-materialization.ext3}"
47+
48+ if [ "${BABS_USE_INPUT_OVERLAY}" = "1" ]; then
49+ echo "# BABS input overlay mode is enabled."
50+ echo "# Helper image: ${BABS_OVERLAY_HELPER_IMAGE:-<unset >}"
51+ echo "# Overlay image: ${BABS_INPUT_OVERLAY_PATH}"
52+ echo "# Overlay size (GiB): ${BABS_OVERLAY_SIZE_GB}"
53+ echo "# Overlay workdir in helper: ${BABS_OVERLAY_WORKDIR}"
54+
55+ if [ -z "${BABS_OVERLAY_HELPER_IMAGE}" ] || [ ! -r "${BABS_OVERLAY_HELPER_IMAGE}" ]; then
56+ echo "ERROR: BABS_USE_INPUT_OVERLAY=1 but BABS_OVERLAY_HELPER_IMAGE is unset or unreadable." >&2
57+ echo "Build a helper image once from this repo using docker/babs-input-overlay-helper.def," >&2
58+ echo "then set BABS_OVERLAY_HELPER_IMAGE to that .sif path in your job environment." >&2
59+ exit 2
60+ fi
61+
62+ if ! command -v singularity >/dev/null 2>&1 ; then
63+ echo "ERROR: singularity command not found, but overlay mode requires it." >&2
64+ exit 2
65+ fi
66+
67+ if [ ! -e "${BABS_INPUT_OVERLAY_PATH}" ]; then
68+ echo "# Creating overlay file at ${BABS_INPUT_OVERLAY_PATH}"
69+ truncate -s "${BABS_OVERLAY_SIZE_GB}g" "${BABS_INPUT_OVERLAY_PATH}"
70+ if ! mkfs.ext3 -F -t ext -m 0 -q -E root_owner "${BABS_INPUT_OVERLAY_PATH}"; then
71+ echo "# mkfs.ext3 with -E root_owner failed; retrying without it."
72+ mkfs.ext3 -F -t ext -m 0 -q "${BABS_INPUT_OVERLAY_PATH}"
73+ fi
74+ fi
75+
76+ if ! singularity exec \
77+ --overlay "${BABS_INPUT_OVERLAY_PATH}" \
78+ "${BABS_OVERLAY_HELPER_IMAGE}" \
79+ bash -lc "command -v datalad >/dev/null 2>&1 && command -v git >/dev/null 2>&1 && command -v git-annex >/dev/null 2>&1 && command -v singularity >/dev/null 2>&1"; then
80+ echo "ERROR: helper image is missing required commands (datalad, git, git-annex, singularity)." >&2
81+ echo "Please ask your admins to build/update the site helper image and retry." >&2
82+ exit 2
83+ fi
84+
85+ singularity exec \
86+ --overlay "${BABS_INPUT_OVERLAY_PATH}" \
87+ "${BABS_OVERLAY_HELPER_IMAGE}" \
88+ env \
89+ dssource="${dssource}" \
90+ pushgitremote="${pushgitremote}" \
91+ subid="${subid}" \
92+ BRANCH="${BRANCH}" \
93+ {% if processing_level == 'session' %}
94+ sesid="${sesid}" \
95+ {% endif %}
96+ BABS_OVERLAY_WORKDIR="${BABS_OVERLAY_WORKDIR}" \
97+ bash -s <<'BABS_OVERLAY_JOB_EOF'
3898# datalad clone the input ria:
3999echo '# Clone the data from input RIA:'
100+ mkdir -p "${BABS_OVERLAY_WORKDIR}"
101+ cd "${BABS_OVERLAY_WORKDIR}"
40102datalad clone "${dssource}" ds
41103cd ds
42104
@@ -110,9 +172,97 @@ flock "${DSLOCKFILE}" git push outputstore
110172datalad drop -r . --reckless availability --reckless modification
111173
112174git annex dead here
175+ BABS_OVERLAY_JOB_EOF
176+
177+ if [ "${BABS_OVERLAY_KEEP}" = "1" ]; then
178+ echo "# Keeping overlay file for debugging: ${BABS_INPUT_OVERLAY_PATH}"
179+ else
180+ rm -f "${BABS_INPUT_OVERLAY_PATH}"
181+ fi
182+ else
183+ # datalad clone the input ria:
184+ echo '# Clone the data from input RIA:'
185+ datalad clone "${dssource}" ds
186+ cd ds
187+
188+ # set up the result deposition:
189+ echo '# Register output RIA as remote for result deposition:'
190+ git remote add outputstore "${pushgitremote}"
191+
192+ # set up a new branch:
193+ echo "# Create a new branch for this job's results:"
194+ git checkout -b "${BRANCH}"
195+
196+ # Start of the application-specific code: ------------------------------
197+
198+ # pull down input data (but don't retrieve the data content) and remove other sub's data:
199+ echo "# Pull down the input subject (or dataset) but don't retrieve data contents:"
200+ {% for input_dataset in input_datasets %}
201+ {% if not input_dataset ['is_zipped' ] %}
202+ datalad get -n "{{ input_dataset['path_in_babs'] }}/${subid}"
203+ (cd {{ input_dataset['path_in_babs'] }} && find . -type d -name 'sub*' | grep -v "$subid" | xargs rm -rf)
204+ {% if processing_level == 'session' %}
205+ (cd {{ input_dataset['path_in_babs'] }}/"${subid}" && find . -type d -name 'ses*' | grep -v "$sesid" | xargs rm -rf)
206+ {% endif %}
207+ {% else %}
208+ datalad get -n "{{ input_dataset['path_in_babs'] }}"
209+ (cd {{ input_dataset['path_in_babs'] }} && find . -type f -name 'sub*.zip' | grep -v "$subid" | xargs rm -f)
210+ {% endif %}
211+ {% endfor %}
212+
213+ {{ zip_locator_text }}
214+
215+ # datalad run:
216+ datalad run \
217+ -i "{{ run_script_relpath if run_script_relpath else 'code/' + container_name + '_zip.sh' }}" \
218+ {% for input_dataset in input_datasets %}
219+ {% if not input_dataset ['is_zipped' ] %}
220+ -i "{{ input_dataset['unzipped_path_containing_subject_dirs'] }}/${subid}{% if processing_level == 'session' %} /${sesid}{% endif %} " \
221+ -i "{{ input_dataset['unzipped_path_containing_subject_dirs'] }}/*json" \
222+ {% else %}
223+ -i "${% raw %} {{%endraw%}{{ input_dataset['name'] .upper() }}_ZIP{% raw %} }{% endraw %} " \
224+ {% endif %}
225+ {% endfor %}
226+ {% if container_images %}
227+ {% for image_path in container_images %}
228+ -i "{{ image_path }}" \
229+ {% endfor %}
230+ {% elif not run_script_relpath %}
231+ -i "containers/.datalad/environments/{{container_name}}/image" \
232+ {% endif %}
233+ {% if datalad_expand_inputs %}
234+ --expand inputs \
235+ {% endif %}
236+ --explicit \
237+ {% if zip_foldernames is not none %}
238+ {% for key , value in zip_foldernames .items () %}
239+ -o "${subid}{% if processing_level == 'session' %} _${sesid}{% endif %} _{{ key }}-{{ value }}.zip" \
240+ {% endfor %}
241+ {% endif %}
242+ -m "{{ datalad_run_message if datalad_run_message is defined else container_name }} ${subid}{% if processing_level == 'session' %} ${sesid}{% endif %} " \
243+ "bash ./{{ run_script_relpath if run_script_relpath else 'code/' + container_name + '_zip.sh' }} ${subid} {% if processing_level == 'session' %} ${sesid}{% endif %}{% for input_dataset in input_datasets %}{% if input_dataset ['is_zipped' ] %} ${% raw %} {{%endraw%}{{ input_dataset['name'] .upper() }}_ZIP{% raw %} }{% endraw %}{% endif %}{% endfor %} "
244+
245+ # Finish up:
246+ # push result file content to output RIA storage:
247+ echo '# Push result file content to output RIA storage:'
248+ datalad push --to output-storage
249+
250+ # push the output branch:
251+ echo '# Push the branch with provenance records:'
252+ flock "${DSLOCKFILE}" git push outputstore
253+
254+ # Delete:
255+ datalad drop -r . --reckless availability --reckless modification
256+
257+ git annex dead here
258+ fi
113259
114260# cd out of $BRANCH:
115261cd ../..
116- rm -rf "${BRANCH}"
262+ if [ "${BABS_USE_INPUT_OVERLAY}" = "1" ] && [ "${BABS_OVERLAY_KEEP}" = "1" ]; then
263+ echo "# Keeping ${BRANCH} because BABS_OVERLAY_KEEP=1."
264+ else
265+ rm -rf "${BRANCH}"
266+ fi
117267
118268echo SUCCESS
0 commit comments