diff --git a/Dockerfile b/Dockerfile new file mode 100644 index 0000000..2d4d82c --- /dev/null +++ b/Dockerfile @@ -0,0 +1,214 @@ +# Generated by Neurodocker and Reproenv. + +FROM docker.io/gentoo/portage:20240324 as portage +FROM docker.io/gentoo/stage3:20240318 + +COPY --from=portage /var/db/repos/gentoo /var/db/repos/gentoo + +ARG gentoo_hash=0e9370b45a589867220384ca6c63bc6bcaec3f74 +ARG science_hash=5307342730267714f7019d62f77b2d9bf7624d8c +ARG FEATURES="-ipc-sandbox -network-sandbox -pid-sandbox" + +# Instead of +# COPY gentoo-portage/ /etc/portage/ +# produced by https://gist.github.com/yarikoptic/5da985d200fa1a2185a702ce9913d4d4 +# with further HEAVY tune up to make it work +# Lessons: +# - needed to use echo -n so we could have new lines +# - \n at the beginning since otherwise # lines are ignored as comments +# NOTES/QUESTIONS: +# - hardcoded --jobs and --load-average -- I wonder if some generic way +RUN \ + mkdir -p /etc/portage/; \ + echo -e "\ +\nCOMMON_FLAGS=\"-O2 -pipe -march=native\" \ +\nMAKEOPTS=\"--jobs 8 --load-average 9\" \ +\nCFLAGS=\"\${COMMON_FLAGS}\" \ +\nCXXFLAGS=\"\${COMMON_FLAGS}\" \ +\nFCFLAGS=\"\${COMMON_FLAGS}\" \ +\nFFLAGS=\"\${COMMON_FLAGS}\" \ +\nLC_MESSAGES=C \ +\nUSE=\"\${USE} science\" \ +\nACCEPT_LICENSE=\"*\" \ +" > "/etc/portage/make.conf"; \ +mkdir -p "/etc/portage/package.accept_keywords"; \ +echo -e "*/* ~amd64" > "/etc/portage/package.accept_keywords/gen" ; \ +mkdir -p "/etc/portage/package.mask"; \ +touch "/etc/portage/package.mask/bugs"; \ +mkdir -p "/etc/portage/repos.conf" ; \ +echo -e "[gentoo] \ +\nlocation = /var/db/repos/gentoo \ +\nsync-type = git \ +\nsync-uri = https://anongit.gentoo.org/git/repo/gentoo.git \ +\nsync-git-verify-commit-signature = yes" > "/etc/portage/repos.conf/gentoo"; \ +echo -e "[science] \ +\nlocation = /var/db/repos/science \ +\nsync-type = git \ +\nsync-uri = https://anongit.gentoo.org/git/proj/sci.git \ +\npriority = 7777" > "/etc/portage/repos.conf/science" + +RUN emerge -v --noreplace dev-vcs/git \ + && emerge -v1u portage \ + && mkdir /outputs \ + && rm /var/db/repos/gentoo -rf \ + && git config --global init.defaultBranch master \ + && \ + set -x && export GIT_TRACE=1 && \ + REPO_URL=$(grep "^sync-uri" /etc/portage/repos.conf/gentoo | sed -e "s/sync-uri *= *//g") && \ + git clone --depth 1 ${REPO_URL} /var/db/repos/gentoo && \ + cd /var/db/repos/gentoo && \ + git fetch --depth 1 origin $gentoo_hash && \ + git reset --hard $gentoo_hash && \ + rm .git -rf && \ + REPO_URL=$(grep "^sync-uri" /etc/portage/repos.conf/science | sed -e "s/sync-uri *= *//g") && \ + git clone --depth 1 ${REPO_URL} /var/db/repos/science && \ + cd /var/db/repos/science && \ + git fetch --depth 1 origin $science_hash && \ + git reset --hard $science_hash && \ + rm .git -rf + # Old Christian: Remove sync-uri to not accidentally re-sync if we work with the package management interactively + # Christian from the future: Maybe we want the option to re-sync if we're debugging it interactively... + #RUN sed -i /etc/portage/repos.conf/{gentoo,science} -e "s/sync-type *= *git/sync-type =/g" + #RUN sed -i /etc/portage/repos.conf/{gentoo,science} -e "/sync-uri/d" + #RUN sed -i /etc/portage/repos.conf/{gentoo,science} -e "/sync-git-verify-commit-signature/d" + # Make sure all CPU flags supported by the hardware are whitelisted + # This only affects packages with handwritten assembly language optimizations, e.g. ffmpeg. + # Removing it is safe, software will just not take full advantage of processor capabilities. + #RUN emerge cpuid2cpuflags + #RUN echo "*/* $(cpuid2cpuflags)" > /etc/portage/package.use/00cpu-flags + ### Emerge cool stuff here + ### Autounmask-continue enables all features on dependencies which the top level packages require + ### By default this needs user confirmation which would interrupt the build. +RUN emerge --autounmask-continue \ + afni \ + fsl \ + && rm -rf /var/tmp/portage/* +COPY ["environment.yml", \ + "/opt/environment.yml"] +COPY ["src", \ + "/opt/dsst-defacing-pipeline"] +ENV CONDA_DIR="/opt/miniconda-latest" \ + AFNI_ATLAS_PATH=/usr/share/afni-datasets/ \ + PATH="/opt/miniconda-latest/bin:$PATH" +RUN \ + # Install dependencies. + export PATH="/opt/miniconda-latest/bin:$PATH" \ + && echo "Downloading Miniconda installer ..." \ + && conda_installer="/tmp/miniconda.sh" \ + && curl -fsSL -o "$conda_installer" https://repo.continuum.io/miniconda/Miniconda3-latest-Linux-x86_64.sh \ + && bash "$conda_installer" -b -p /opt/miniconda-latest \ + && rm -f "$conda_installer" \ + && conda update -yq -nbase conda \ + # Prefer packages in conda-forge + && conda config --system --prepend channels conda-forge \ + # Packages in lower-priority channels not considered if a package with the same + # name exists in a higher priority channel. Can dramatically speed up installations. + # Conda recommends this as a default + # https://docs.conda.io/projects/conda/en/latest/user-guide/tasks/manage-channels.html + && conda config --set channel_priority strict \ + && conda config --system --set auto_update_conda false \ + && conda config --system --set show_channel_urls true \ + # Enable `conda activate` + && conda init bash \ + && conda env create --name dsstdeface --file /opt/environment.yml \ + # Clean up + && sync && conda clean --all --yes && sync \ + && rm -rf ~/.cache/pip/* +RUN test "$(getent passwd dsst)" \ + || useradd --no-user-group --create-home --shell /bin/bash dsst +USER dsst +ENTRYPOINT ["/opt/dsst-defacing-pipeline/run.py"] + +# Save specification to JSON. +USER root +RUN printf '{ \ + "pkg_manager": "portage", \ + "existing_users": [ \ + "root" \ + ], \ + "instructions": [ \ + { \ + "name": "from_", \ + "kwds": { \ + "base_image": "docker.io/gentoo/portage:20240324 as portage" \ + } \ + }, \ + { \ + "name": "from_", \ + "kwds": { \ + "base_image": "docker.io/gentoo/stage3:20240318" \ + } \ + }, \ + { \ + "name": "run", \ + "kwds": { \ + "command": "COPY --from=portage /var/db/repos/gentoo /var/db/repos/gentoo\\nRUN emerge -v --noreplace dev-vcs/git\\nRUN emerge -v1u portage\\n# Pinned commits for the dependency tree state\\nARG gentoo_hash=2d25617a1d085316761b06c17a93ec972f172fc6\\nARG science_hash=73916dd3680ffd92e5bd3d32b262e5d78c86a448\\nARG FEATURES=\\"-ipc-sandbox -network-sandbox -pid-sandbox\\"\\n# This will be bound, and contents available outside of container\\nRUN mkdir /outputs\\nCOPY gentoo-portage/ /etc/portage/\\n# Moving gentoo repo from default rsync to git\\nRUN rm /var/db/repos/gentoo -rf\\n# Cloning manually to prevent vdb update, pinning state via git\\n# Allegedly it'"'"'s better to chain everything in one command, something with container layers \\ud83e\\udd14\\nRUN \\\\\\n REPO_URL=$\(grep \\"^sync-uri\\" /etc/portage/repos.conf/gentoo | sed -e \\"s/sync-uri *= *//g\\"\) && \\\\\\n mkdir -p /var/db/repos/gentoo && pushd /var/db/repos/gentoo && git init . && \\\\\\n git remote add origin ${REPO_URL} && \\\\\\n git fetch --filter=\\"blob:none\\" origin $gentoo_hash && \\\\\\n git reset --hard $gentoo_hash && rm .git -rf && popd && \\\\\\n REPO_URL=$\(grep \\"^sync-uri\\" /etc/portage/repos.conf/science | sed -e \\"s/sync-uri *= *//g\\"\) && \\\\\\n mkdir -p /var/db/repos/science && pushd /var/db/repos/science && git init . && \\\\\\n git remote add origin ${REPO_URL} && \\\\\\n git fetch --filter=\\"blob:none\\" origin $science_hash && \\\\\\n git reset --hard $science_hash && rm .git -rf && popd\\n# Old Christian: Remove sync-uri to not accidentally re-sync if we work with the package management interactively\\n# Christian from the future: Maybe we want the option to re-sync if we'"'"'re debugging it interactively...\\n#RUN sed -i /etc/portage/repos.conf/{gentoo,science} -e \\"s/sync-type *= *git/sync-type =/g\\"\\n#RUN sed -i /etc/portage/repos.conf/{gentoo,science} -e \\"/sync-uri/d\\"\\n#RUN sed -i /etc/portage/repos.conf/{gentoo,science} -e \\"/sync-git-verify-commit-signature/d\\"\\n# Make sure all CPU flags supported by the hardware are whitelisted\\n# This only affects packages with handwritten assembly language optimizations, e.g. ffmpeg.\\n# Removing it is safe, software will just not take full advantage of processor capabilities.\\n#RUN emerge cpuid2cpuflags\\n#RUN echo \\"*/* $\(cpuid2cpuflags\)\\" > /etc/portage/package.use/00cpu-flags\\n### Emerge cool stuff here\\n### Autounmask-continue enables all features on dependencies which the top level packages require\\n### By default this needs user confirmation which would interrupt the build." \ + } \ + }, \ + { \ + "name": "install", \ + "kwds": { \ + "pkgs": [ \ + "afni", \ + "fsl" \ + ], \ + "opts": null \ + } \ + }, \ + { \ + "name": "run", \ + "kwds": { \ + "command": "emerge --autounmask-continue \\\\\\n afni \\\\\\n fsl \\\\\\n && rm -rf /var/tmp/portage/*" \ + } \ + }, \ + { \ + "name": "copy", \ + "kwds": { \ + "source": [ \ + "environment.yml", \ + "/opt/environment.yml" \ + ], \ + "destination": "/opt/environment.yml" \ + } \ + }, \ + { \ + "name": "copy", \ + "kwds": { \ + "source": [ \ + "src", \ + "/opt/dsst-defacing-pipeline" \ + ], \ + "destination": "/opt/dsst-defacing-pipeline" \ + } \ + }, \ + { \ + "name": "env", \ + "kwds": { \ + "CONDA_DIR": "/opt/miniconda-latest", \ + "PATH": "/opt/miniconda-latest/bin:$PATH" \ + } \ + }, \ + { \ + "name": "run", \ + "kwds": { \ + "command": "\\n# Install dependencies.\\nexport PATH=\\"/opt/miniconda-latest/bin:$PATH\\"\\necho \\"Downloading Miniconda installer ...\\"\\nconda_installer=\\"/tmp/miniconda.sh\\"\\ncurl -fsSL -o \\"$conda_installer\\" https://repo.continuum.io/miniconda/Miniconda3-latest-Linux-x86_64.sh\\nbash \\"$conda_installer\\" -b -p /opt/miniconda-latest\\nrm -f \\"$conda_installer\\"\\nconda update -yq -nbase conda\\n# Prefer packages in conda-forge\\nconda config --system --prepend channels conda-forge\\n# Packages in lower-priority channels not considered if a package with the same\\n# name exists in a higher priority channel. Can dramatically speed up installations.\\n# Conda recommends this as a default\\n# https://docs.conda.io/projects/conda/en/latest/user-guide/tasks/manage-channels.html\\nconda config --set channel_priority strict\\nconda config --system --set auto_update_conda false\\nconda config --system --set show_channel_urls true\\n# Enable `conda activate`\\nconda init bash\\nconda env create --name dsstdeface --file /opt/environment.yml\\n# Clean up\\nsync && conda clean --all --yes && sync\\nrm -rf ~/.cache/pip/*" \ + } \ + }, \ + { \ + "name": "user", \ + "kwds": { \ + "user": "dsst" \ + } \ + }, \ + { \ + "name": "entrypoint", \ + "kwds": { \ + "args": [ \ + "/opt/dsst-defacing-pipeline/run.py" \ + ] \ + } \ + } \ + ] \ +}' > /.reproenv.json +USER dsst +# End saving to specification to JSON. diff --git a/generate_container.sh b/generate_container.sh new file mode 100755 index 0000000..de045f9 --- /dev/null +++ b/generate_container.sh @@ -0,0 +1,39 @@ +#!/bin/bash + +set -eu + +generate() { + # more details might come on https://github.com/ReproNim/neurodocker/issues/330 + [ "$1" == singularity ] && add_entry=' "$@"' || add_entry='' + #neurodocker generate "$1" \ + #ndversion=0.9.5 + #ndversion=master + #docker run --rm repronim/neurodocker:$ndversion \ + # ATM needs devel version of neurodocker for a fix to AFNI recipe + #--base-image neurodebian:bookworm \ + #--ndfreeze date=20240320 \ + dest=/opt/dsst-defacing-pipeline + neurodocker \ + generate "$1" \ + --pkg-manager portage \ + --base-image "docker.io/gentoo/portage:20240324 as portage" \ + --base-image "docker.io/gentoo/stage3:20240318" \ + --gentoo gentoo_hash=2d25617a1d085316761b06c17a93ec972f172fc6 \ + --install afni fsl \ + --copy environment.yml /opt/environment.yml \ + --copy src "$dest" \ + --miniconda \ + version=latest \ + env_name=dsstdeface \ + env_exists=false \ + yaml_file=/opt/environment.yml \ + --user=dsst \ + --entrypoint "$dest/run.py" + #--run "curl -sL https://deb.nodesource.com/setup_16.x | bash - " \ + #--install nodejs npm \ + #--run "npm install -g bids-validator@1.14.4" \ + #--fsl version=6.0.7.1 \ +} + +generate docker > Dockerfile +# generate singularity > Singularity diff --git a/src/deface.py b/src/deface.py index 148673c..475b17e 100644 --- a/src/deface.py +++ b/src/deface.py @@ -9,10 +9,10 @@ import register -def run_command(cmdstr, logfile): +def run_command(cmdstr, logfile, check : bool=True): if not logfile: logfile = subprocess.PIPE - subprocess.run(cmdstr, stdout=logfile, stderr=subprocess.STDOUT, encoding='utf8', shell=True) + subprocess.run(cmdstr, stdout=logfile, stderr=subprocess.STDOUT, encoding='utf8', shell=True, check=check) def rename_afni_workdir(workdir_path): @@ -63,7 +63,7 @@ def compress_to_gz(input_file, output_file): def copy_over_sidecar(scan_filepath, input_anat_dir, output_anat_dir): - prefix = '_'.join([i for i in re.split('_|\.', scan_filepath.name) if i not in ['defaced', 'nii', 'gz']]) + prefix = '_'.join([i for i in re.split(r'_|\.', scan_filepath.name) if i not in ['defaced', 'nii', 'gz']]) filename = prefix + '.json' json_sidecar = input_anat_dir / filename shutil.copy2(json_sidecar, output_anat_dir / filename) @@ -173,8 +173,7 @@ def run_afni_refacer(primary_t1, others, subj_input_dir, sess_dir, output_dir): # afni refacer commands refacer_cmd = f"@afni_refacer_run -input {primary_t1} -mode_deface -no_clean -prefix {fspath(subj_outdir / prefix)}" - # TODO remove module load afni - full_cmd = f"module load afni ; export OMP_NUM_THREADS=1 ; {refacer_cmd}" + full_cmd = f"export OMP_NUM_THREADS=1 ; {refacer_cmd}" # TODO make log text less ugly; perhaps in a separate function log_filename = subj_outdir / 'defacing_pipeline.log' diff --git a/src/generate_mappings.py b/src/generate_mappings.py index c4c8673..a7b4473 100644 --- a/src/generate_mappings.py +++ b/src/generate_mappings.py @@ -1,4 +1,4 @@ -#!/usr/local/bin/python3 +#!/usr/bin/env python3 """Generates Primary to "others" mapping file and prints VisualQC's T1 MRI utility command. diff --git a/src/prepare_shareable.py b/src/prepare_shareable.py index dd5297e..a3d3ac7 100644 --- a/src/prepare_shareable.py +++ b/src/prepare_shareable.py @@ -1,4 +1,4 @@ -#!/usr/local/bin/python3 +#!/usr/bin/env python3 import argparse import subprocess diff --git a/src/prepare_to_share.py b/src/prepare_to_share.py index ac21ee7..93936f4 100644 --- a/src/prepare_to_share.py +++ b/src/prepare_to_share.py @@ -1,4 +1,4 @@ -#!/usr/local/bin/python3 +#!/usr/bin/env python3 import argparse import subprocess diff --git a/src/register.py b/src/register.py index ed9bd03..56fd778 100644 --- a/src/register.py +++ b/src/register.py @@ -11,16 +11,13 @@ def preprocess_facemask(fmask_path, logfile_obj): prefix = fmask_path.parent.joinpath('afni_facemask') defacemask = fmask_path.parent.joinpath('afni_defacemask.nii.gz') - # load fsl module - c0 = f"module load fsl" - # split the 4D volume c1 = f"fslroi {fmask_path} {prefix} 1 1" # arithmetic on the result from above c2 = f"fslmaths {prefix}.nii.gz -abs -binv {defacemask}" print(f"Generating a defacemask... \n ") - run_command('; '.join([c0, c1, c2]), logfile_obj) + run_command('; '.join([c1, c2]), logfile_obj) try: if defacemask.exists(): return defacemask @@ -68,7 +65,7 @@ def register_to_primary_scan(subj_dir, afni_workdir, primary_scan, other_scans_l mask_cmd = f"fslmaths {other} -mas {other_mask} {other_defaced}" - full_cmd = " ; ".join(["module load fsl", cp_cmd, flirt_cmd, applyxfm_cmd, mask_cmd]) + '\n' + full_cmd = " ; ".join([cp_cmd, flirt_cmd, applyxfm_cmd, mask_cmd]) + '\n' print(f"Registering {other.name} to {primary_scan.name} and applying defacemask...") run_command(full_cmd, log_fileobj) diff --git a/src/run.py b/src/run.py old mode 100644 new mode 100755 index 9209b49..09a77fe --- a/src/run.py +++ b/src/run.py @@ -1,3 +1,5 @@ +#!/usr/bin/env python + import argparse import json import re