@@ -32,12 +32,24 @@ BRANCH="job-${%raw%}{{%endraw%}{{varname_jobid}}{%raw%}}{%endraw%}-${%raw%}{{%en
3232BRANCH="job-${% raw %} {{%endraw%}{{varname_jobid}}{% raw %} }{% endraw %} -${% raw %} {{%endraw%}{{varname_taskid}}{% raw %} }{% endraw %} -${subid}"
3333{% endif %}
3434
35+ cleanup() {
36+ set +e
37+ if [ -d "{{ job_scratch_directory }}/{% raw %} ${BRANCH}{% endraw %} /ds" ]; then
38+ cd "{{ job_scratch_directory }}/{% raw %} ${BRANCH}{% endraw %} /ds" 2>/dev/null || true
39+ datalad drop -r . --reckless availability --reckless modification >/dev/null 2>&1 || true
40+ git annex dead here >/dev/null 2>&1 || true
41+ fi
42+ cd "{{ job_scratch_directory }}" 2>/dev/null || true
43+ rm -rf "{{ job_scratch_directory }}/{% raw %} ${BRANCH}{% endraw %} " >/dev/null 2>&1 || true
44+ }
45+ trap cleanup EXIT
46+
3547mkdir "${BRANCH}"
3648cd "${BRANCH}"
3749
3850# datalad clone the input ria:
3951echo '# Clone the data from input RIA:'
40- datalad clone "${dssource}" ds
52+ datalad clone "${dssource}" ds -- --no-checkout
4153cd ds
4254
4355# set up the result deposition:
@@ -48,32 +60,69 @@ git remote add outputstore "${pushgitremote}"
4860echo "# Create a new branch for this job's results:"
4961git checkout -b "${BRANCH}"
5062
63+ # always use sparse-checkout, print error when not available
64+ if ! git sparse-checkout init --cone; then
65+ echo "ERROR: git sparse-checkout is not available (or failed to initialize) on this system." 1>&2
66+ exit 1
67+ fi
68+
69+ git sparse-checkout set \
70+ code \
71+ containers \
72+ {% for input_dataset in input_datasets %}
73+ {{ input_dataset['path_in_babs'] }}{% if not loop .last %} \
74+ {% endif %}
75+ {% endfor %}
76+
77+ git checkout -f
78+
5179# Start of the application-specific code: ------------------------------
5280
53- # pull down input data (but don't retrieve the data content) and remove other sub's data :
54- echo "# Pull down the input subject (or dataset) but don't retrieve data contents:"
81+ # pull down only needed session path and explicit dataset-level metadata :
82+ echo "# Pull down the input session but don't retrieve data contents:"
5583{% for input_dataset in input_datasets %}
5684{% if not input_dataset ['is_zipped' ] %}
57- datalad get -n "{{ input_dataset['path_in_babs'] }}/${subid}"
58- (cd {{ input_dataset['path_in_babs'] }} && find . -type d -name 'sub*' | grep -v "$subid" | xargs rm -rf)
59- {% if processing_level == 'session' %}
60- (cd {{ input_dataset['path_in_babs'] }}/"${subid}" && find . -type d -name 'ses*' | grep -v "$sesid" | xargs rm -rf)
61- {% endif %}
85+ datalad get -n "{{ input_dataset['path_in_babs'] }}/{% raw %} ${subid}{% endraw %}{% if processing_level == 'session' %} /{% raw %} ${sesid}{% endraw %}{% endif %} "
86+
87+ datalad get -n \
88+ "{{ input_dataset['path_in_babs'] }}/dataset_description.json" \
89+ "{{ input_dataset['path_in_babs'] }}/participants.tsv" \
90+ "{{ input_dataset['path_in_babs'] }}/participants.json" || true
6291{% else %}
6392datalad get -n "{{ input_dataset['path_in_babs'] }}"
64- (cd {{ input_dataset['path_in_babs'] }} && find . -type f -name 'sub*.zip' | grep -v "$subid" | xargs rm -f)
6593{% endif %}
6694{% endfor %}
6795
6896{{ zip_locator_text }}
6997
98+ # Link to shared container image so each job does not re-clone the same image
99+ PROJECT_ROOT="${PROJECT_ROOT:?ERROR: PROJECT_ROOT env var must be set to {{ project_root | default('{project_root}') }}}"
100+ CONTAINER_SHARED="${PROJECT_ROOT}/analysis/containers/.datalad/environments/{{ container_name }}/image"
101+ CONTAINER_JOB="containers/.datalad/environments/{{ container_name }}/image"
102+
103+ if [ ! -e "${CONTAINER_SHARED}" ]; then
104+ echo "ERROR: shared container image not found at ${CONTAINER_SHARED}" 1>&2
105+ exit 1
106+ fi
107+
108+ mkdir -p "containers/.datalad/environments/{{ container_name }}"
109+ # Replace any existing path (e.g. sparse-checkout placeholder or annex pointer) with symlink to shared image
110+ ln -sf "${CONTAINER_SHARED}" "${CONTAINER_JOB}"
111+
112+ if [ ! -e "${CONTAINER_JOB}" ]; then
113+ echo "ERROR: failed to create symlink ${CONTAINER_JOB}" 1>&2
114+ exit 1
115+ fi
116+
70117# datalad run:
71118datalad run \
72119 -i "{{ run_script_relpath if run_script_relpath else 'code/' + container_name + '_zip.sh' }}" \
73120{% for input_dataset in input_datasets %}
74121{% if not input_dataset ['is_zipped' ] %}
75- -i "{{ input_dataset['unzipped_path_containing_subject_dirs'] }}/${subid}{% if processing_level == 'session' %} /${sesid}{% endif %} " \
76- -i "{{ input_dataset['unzipped_path_containing_subject_dirs'] }}/*json" \
122+ -i "{{ input_dataset['unzipped_path_containing_subject_dirs'] }}/{% raw %} ${subid}{% endraw %}{% if processing_level == 'session' %} /{% raw %} ${sesid}{% endraw %}{% endif %} " \
123+ -i "{{ input_dataset['path_in_babs'] }}/dataset_description.json" \
124+ -i "{{ input_dataset['path_in_babs'] }}/participants.tsv" \
125+ -i "{{ input_dataset['path_in_babs'] }}/participants.json" \
77126{% else %}
78127 -i "${% raw %} {{%endraw%}{{ input_dataset['name'] .upper() }}_ZIP{% raw %} }{% endraw %} " \
79128{% endif %}
@@ -82,7 +131,7 @@ datalad run \
82131{% for image_path in container_images %}
83132 -i "{{ image_path }}" \
84133{% endfor %}
85- {% elif not run_script_relpath %}
134+ {% else %}
86135 -i "containers/.datalad/environments/{{container_name}}/image" \
87136{% endif %}
88137{% if datalad_expand_inputs %}
@@ -91,11 +140,11 @@ datalad run \
91140 --explicit \
92141{% if zip_foldernames is not none %}
93142{% for key , value in zip_foldernames .items () %}
94- -o "${subid}{% if processing_level == 'session' %} _${sesid}{% endif %} _{{ key }}-{{ value }}.zip" \
143+ -o "{% raw %} ${subid}{% endraw %}{% if processing_level == 'session' %} _{% raw %} ${sesid} {% endraw % }{% endif %} _{{ key }}-{{ value }}.zip" \
95144{% endfor %}
96145{% endif %}
97- -m "{{ datalad_run_message if datalad_run_message is defined else container_name }} ${subid}{% if processing_level == 'session' %} ${sesid}{% endif %} " \
98- "bash ./{{ run_script_relpath if run_script_relpath else 'code/' + container_name + '_zip.sh' }} ${subid} {% if processing_level == 'session' %} ${sesid}{% endif %}{% for input_dataset in input_datasets %}{% if input_dataset ['is_zipped' ] %} ${% raw %} {{%endraw%}{{ input_dataset['name'] .upper() }}_ZIP{% raw %} }{% endraw %}{% endif %}{% endfor %} "
146+ -m "{{ ( datalad_run_message if datalad_run_message is defined and datalad_run_message else container_name) }} {% raw %} ${subid}{% endraw %}{% if processing_level == 'session' %} {% raw %} ${sesid} {% endraw % }{% endif %} " \
147+ "bash ./{{ run_script_relpath if run_script_relpath else 'code/' + container_name + '_zip.sh' }} {% raw %} ${subid}{% endraw %} {% if processing_level == 'session' %} {% raw %} ${sesid} {% endraw % }{% endif %}{% for input_dataset in input_datasets %}{% if input_dataset ['is_zipped' ] %} ${% raw %} {{%endraw%}{{ input_dataset['name'] .upper() }}_ZIP{% raw %} }{% endraw %}{% endif %}{% endfor %} "
99148
100149# Finish up:
101150# push result file content to output RIA storage:
@@ -106,13 +155,4 @@ datalad push --to output-storage
106155echo '# Push the branch with provenance records:'
107156flock "${DSLOCKFILE}" git push outputstore
108157
109- # Delete:
110- datalad drop -r . --reckless availability --reckless modification
111-
112- git annex dead here
113-
114- # cd out of $BRANCH:
115- cd ../..
116- rm -rf "${BRANCH}"
117-
118- echo SUCCESS
158+ echo SUCCESS
0 commit comments