|
| 1 | +#!/bin/bash |
| 2 | +#SBATCH --gres=gpu:1 |
| 3 | +#SBATCH --cpus-per-task=12 |
| 4 | +#SBATCH --time=7-00:00 |
| 5 | +#SBATCH --output=out/%x-%j.out |
| 6 | + |
| 7 | + |
| 8 | +# Note: Flag time format --time=D-HH:MM -> D=day, HH=hours, MM=minutes |
| 9 | + |
| 10 | +# ================================================================================================= |
| 11 | +# Execute slurm job |
| 12 | +# |
| 13 | +# Usage: |
| 14 | +# $ bash slurm_job.template.bash [<any-dna-argument>] |
| 15 | +# |
| 16 | +# ================================================================================================= |
| 17 | +declare -x SJOB_ID |
| 18 | +declare -a dna_run_slurm_flags=() |
| 19 | +declare -a hydra_flags=() |
| 20 | + |
| 21 | +# ====Setup======================================================================================== |
| 22 | +# ....Custom setup (optional)...................................................................... |
| 23 | +function dna::job_setup_callback() { |
| 24 | + # TODO: Add any instruction that should be executed before 'dna run slurm' command |
| 25 | + : |
| 26 | +} |
| 27 | + |
| 28 | +# ....Custom teardown (optional)................................................................... |
| 29 | +function dna::job_teardown_callback() { |
| 30 | + local exit_code=$? |
| 31 | + # TODO: Add any instruction that should be executed after 'dna run slurm' exit. |
| 32 | + |
| 33 | + # Note: Command 'dna run slurm' already handle stoping the container in case the slurm command |
| 34 | + # `scancel` is issued. |
| 35 | + exit ${exit_code:-1} |
| 36 | +} |
| 37 | + |
| 38 | +# ....Set job name................................................................................. |
| 39 | +# TODO: Set SJOB_ID |
| 40 | +SJOB_ID="default" |
| 41 | +# Note: Recommend opening an issue tracker task (e.g., YouTrack, GitHub issue, Trello) |
| 42 | +# and use its issue ID as an SJOB_ID. |
| 43 | + |
| 44 | +# ....Hydra app module............................................................................. |
| 45 | +# TODO: Set python module to launch |
| 46 | +hydra_flags+=("launcher/example_app_hparm_optim.py") |
| 47 | +# Note: assume container workdir is `<super-project>/src/` |
| 48 | + |
| 49 | +# ....Optional hydra flags......................................................................... |
| 50 | +# --config-path,-cp : Overrides the config_path specified in hydra.main(). (absolute or relative) |
| 51 | +# --config-name,-cn : Overrides the config_name specified in hydra.main() |
| 52 | +# --config-dir,-cd : Adds an additional config dir to the config search path |
| 53 | + |
| 54 | +#hydra_flags+=("--config-path=") |
| 55 | +#hydra_flags+=("--config-dir=") |
| 56 | +#hydra_flags+=("--config-name=") |
| 57 | + |
| 58 | +# ....Debug flags.................................................................................. |
| 59 | +dna_run_slurm_flags+=(--register-hydra-dry-run-flag "+new_key='fake-value'") |
| 60 | + |
| 61 | +dna_run_slurm_flags+=("--skip-core-force-rebuild") |
| 62 | +#dna_run_slurm_flags+=("--dry-run") |
| 63 | +#hydra_flags+=("--cfg" "all") |
| 64 | + |
| 65 | +# ====DNA internal================================================================================= |
| 66 | +dna_run_slurm_flags+=("--log-name" "$(basename -s .bash $0)") |
| 67 | +dna_run_slurm_flags+=("--log-path" "artifact/slurm_jobs_logs") |
| 68 | +dna_run_slurm_flags+=("$@") |
| 69 | +export SJOB_ID |
| 70 | +dna::job_setup_callback |
| 71 | +trap dna::job_teardown_callback EXIT |
| 72 | + |
| 73 | +# ====Launch slurm job============================================================================= |
| 74 | +dna version --all |
| 75 | +dna run slurm "${SJOB_ID:?err}" "${dna_run_slurm_flags[@]}" "${hydra_flags[@]}" |
| 76 | + |
0 commit comments