Merge pull request #1379 from mckenziephagen/sbatch

oesteban · web-flow · commit ad7705d42818 · 2025-02-04T10:31:07.000+01:00
DOC: Update sbatch script to be more detailed for newer users
diff --git a/README.rst b/README.rst
@@ -4,7 +4,7 @@ mriqc: image quality metrics for quality assessment of MRI
 |DOI| |Zenodo| |Package| |Pythons| |DevStatus| |License| |Documentation| |CircleCI| |EOSS|
 
 MRIQC extracts no-reference IQMs (image quality metrics) from
-structural (T1w and T2w) and functional MRI (magnetic resonance imaging)
+structural (T1w and T2w), functional and diffusion MRI (magnetic resonance imaging)
 data.
 
 MRIQC is an open-source project, developed under the following
diff --git a/docs/source/resources/mriqc.sbatch b/docs/source/resources/mriqc.sbatch
@@ -0,0 +1,98 @@
+#!/bin/bash
+
+# NOTE: To use this script, edit lines 17, 26, 40, 55, 56, 58
+ # labeled TODO with NOTES explaining how. 
+
+# Set SBATCH Parameters: 
+# ------------------------------------------
+# NOTE: These should work with Slurm HPC systems, 
+ # but these specific parameters  have only been tested on 
+ # Stanford's Sherlock. Some parameters may need to be 
+ # adjusted for other HPCs, specifically --partition.
+#SBATCH --job-name mriqc
+
+# NOTE: Each HCP has different partitions for job submissions. 
+ # Check your HCPs documentation for what partitions are available
+ # to you. 
+#SBATCH --partition normal #TODO: update for your HPC
+
+# NOTE: The --array parameter allows multiple jobs to be launched at once, 
+ # and is generally recommended to efficiently run several hundred jobs 
+ # simultaneously This should be adjusted to the range for your dataset;
+ # 1-n%j where n is the number of 
+ # participants and j is the maximum number of concurrent jobs.  
+ # In this example, there are 216 participants, and only 50 run
+ # at a given time. 
+#SBATCH --array=1-216%50  #TODO: adjust for your dataset
+
+# NOTE: These parameters request time and resources from the job
+ # scheduler. These specific parameters should be sufficient for 
+ # most datasets. 
+#SBATCH --time=1:00:00 #NOTE: likely longer than generally needed 
+#SBATCH --ntasks 1
+#SBATCH --cpus-per-task=16
+#SBATCH --mem-per-cpu=4G
+
+# NOTE: These parameters set where log files will be written, and 
+ # where status emails will be sent. 
+#SBATCH --output log/%x-%A-%a.out
+#SBATCH --error log/%x-%A-%a.err
+#SBATCH --mail-user=%u@stanford.edu #TODO: update to your email domain
+#SBATCH --mail-type=ALL 
+
+## More information about these Slurm commands can be found at: 
+ # https://slurm.schedmd.com/sbatch.html
+# ------------------------------------------
+# Setup variables
+# ------------------------------------------
+# NOTE: These variables are paths to your data, and where you'd 
+ # like your output to be written. You should replace STUDY 
+ # with the directory one level below the directory where your data 
+ # is stored. 
+ # You should replace `ds0002785` with the name of your BIDS directory
+STUDY="/scratch/users/mphagen/mriqc-protocol" #TODO: replace with your path
+BIDS_DIR="${STUDY}/ds002785"  # TODO: replace with path to your dataset
+
+# NOTE: These variables set the "Apptainer" execution for your 
+ # MRIQC container
+MRIQC_VERSION="24.0.2" #TODO: update if using a different version
+APPTAINER_CMD="apptainer run -e mriqc_${MRIQC_VERSION}.sif"
+
+OUTPUT_DIR="${BIDS_DIR}/derivatives/mriqc-${MRIQC_VERSION}"
+
+# NOTE: The next two variables are used to extract participant IDs from 
+ # the mandatory participants.tsv. SLURM_ARRAY_TASK_ID is generated by the 
+ #--array parameter, and is determined by each job's order - i.e. the 
+ # first job has the SLURM_ARRAY_TASK_ID=1, the second SLURM_ARRAY_TASK_ID=2.
+ # It is necessary to add 1 to the SLURM_ARRAY_TASK_ID because of the header
+ # in participants.tsv. 
+subject_idx=$(( ${SLURM_ARRAY_TASK_ID} + 1 ))
+ 
+## NOTE: The first clause in this line selects a row in participants.tsv 
+ # using subject_idx. This is piped to grep to isolate the subject id. 
+ # This regex should work for most subject naming conventions, but 
+ # may need to be modified in some cases.
+subject=$( sed -n ${subject_idx}p ${BIDS_DIR}/participants.tsv \
+ | grep -oP "sub-[A-Za-z0-9_]*" ) 
+
+echo Subject $subject
+
+# Define the Apptainer command that will be ran, with MRIQC CLI flags
+cmd="${APPTAINER_CMD} ${BIDS_DIR} ${OUTPUT_DIR} participant \
+     --participant-label $subject \
+     -w $PWD/work/ \
+     --omp-nthreads 10 --nprocs 12"  # For nodes with at least 32GB RAM
+
+# Print useful information to log files
+echo Running task ${SLURM_ARRAY_TASK_ID}
+echo Commandline: $cmd
+
+# Run the full command defined in cmd 
+eval $cmd
+exitcode=$?
+
+# Print useful information to log files
+echo "sub-$subject   ${SLURM_ARRAY_TASK_ID}    $exitcode" \
+      >> ${SLURM_ARRAY_JOB_ID}.tsv
+echo Finished tasks ${SLURM_ARRAY_TASK_ID} with exit code $exitcode
+exit $exitcode