Skip to content

Commit fd58f98

Browse files
committed
Merge branch 'dev' into beta
2 parents 06e0a4e + 4367328 commit fd58f98

File tree

12 files changed

+145
-29
lines changed

12 files changed

+145
-29
lines changed

src/lib/core/docker/Dockerfile.project-core-final

Lines changed: 16 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -21,7 +21,22 @@ RUN <<EOF
2121
source /dn_project_core_finalize.bash \
2222
|| n2st::print_msg_error_and_exit "Failed dn_project_core_finalize.bash!"
2323

24-
# Cleanup buidl script
24+
# ....Project directories ownership............................................................
25+
n2st::print_msg "Set project directories ownership"
26+
{
27+
chown -R $(id -u ${DN_PROJECT_USER:?err}):$(id -g ${DN_PROJECT_USER}) ${DN_PROJECT_USER_HOME:?err}
28+
chown -R $(id -u ${DN_PROJECT_USER}):$(id -g ${DN_PROJECT_USER}) ${DN_PROJECT_PATH:?err}
29+
chown -R $(id -u ${DN_PROJECT_USER}):$(id -g ${DN_PROJECT_USER}) ${DN_DEV_WORKSPACE:?err}
30+
} || {
31+
# Collect debugg information on faillure
32+
pwd
33+
tree -agu
34+
tree -agu ${DN_PROJECT_USER_HOME}
35+
tree -agu ${DN_DEV_WORKSPACE}
36+
exit 1
37+
}
38+
39+
# ....Cleanup buidl script.....................................................................
2540
rm -f /dn_project_core_init.bash
2641
rm -f /dna-lib-container-tools/dn_project_core.setup.bash
2742
rm -f /dna-lib-container-tools/dn_project_core.build.aarch_aware_build_ros.bash

src/lib/core/docker/docker-compose.project.run.ci-tests.yaml

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -15,9 +15,9 @@ services:
1515
DN_ACTIVATE_POWERLINE_PROMT: false
1616
DN_ENTRYPOINT_TRACE_EXECUTION: ${DN_ENTRYPOINT_TRACE_EXECUTION:-false}
1717
IS_TEAMCITY_RUN: ${IS_TEAMCITY_RUN:-false}
18-
NVIDIA_VISIBLE_DEVICES: ${NVIDIA_VISIBLE_DEVICES:-all}
18+
NVIDIA_VISIBLE_DEVICES: ${NVIDIA_VISIBLE_DEVICES:-void}
1919
# see https://docs.nvidia.com/datacenter/cloud-native/container-toolkit/user-guide.html#environment-variables-oci-spec
20-
NVIDIA_DRIVER_CAPABILITIES: ${NVIDIA_DRIVER_CAPABILITIES:-all}
20+
NVIDIA_DRIVER_CAPABILITIES: ${NVIDIA_DRIVER_CAPABILITIES:-}
2121
# see https://docs.nvidia.com/datacenter/cloud-native/container-toolkit/user-guide.html#driver-capabilities
2222
QT_X11_NO_MITSHM: 1
2323
XAUTHORITY: /tmp/.docker.xauth

src/lib/core/docker/docker-compose.project.run.jetson.yaml

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -18,9 +18,9 @@ services:
1818
DN_CONTAINER_NAME: ${DN_CONTAINER_NAME:?err}
1919
DN_ACTIVATE_POWERLINE_PROMT: ${DN_ACTIVATE_POWERLINE_PROMT:-false}
2020
IS_TEAMCITY_RUN: ${IS_TEAMCITY_RUN:-false}
21-
NVIDIA_VISIBLE_DEVICES: ${NVIDIA_VISIBLE_DEVICES:-all}
21+
NVIDIA_VISIBLE_DEVICES: ${NVIDIA_VISIBLE_DEVICES:-void}
2222
# see https://docs.nvidia.com/datacenter/cloud-native/container-toolkit/user-guide.html#environment-variables-oci-spec
23-
NVIDIA_DRIVER_CAPABILITIES: ${NVIDIA_DRIVER_CAPABILITIES:-all}
23+
NVIDIA_DRIVER_CAPABILITIES: ${NVIDIA_DRIVER_CAPABILITIES:-}
2424
# see https://docs.nvidia.com/datacenter/cloud-native/container-toolkit/user-guide.html#driver-capabilities
2525
DISPLAY: ${DISPLAY:-":0"}
2626
QT_X11_NO_MITSHM: 1

src/lib/core/docker/docker-compose.project.run.linux-x86.yaml

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -39,9 +39,9 @@ services:
3939
DN_CONTAINER_NAME: ${DN_CONTAINER_NAME:?err}
4040
DN_ACTIVATE_POWERLINE_PROMT: ${DN_ACTIVATE_POWERLINE_PROMT:-false}
4141
IS_TEAMCITY_RUN: ${IS_TEAMCITY_RUN:-false}
42-
NVIDIA_VISIBLE_DEVICES: ${NVIDIA_VISIBLE_DEVICES:-all}
42+
NVIDIA_VISIBLE_DEVICES: ${NVIDIA_VISIBLE_DEVICES:-void}
4343
# see https://docs.nvidia.com/datacenter/cloud-native/container-toolkit/user-guide.html#environment-variables-oci-spec
44-
NVIDIA_DRIVER_CAPABILITIES: ${NVIDIA_DRIVER_CAPABILITIES:-all}
44+
NVIDIA_DRIVER_CAPABILITIES: ${NVIDIA_DRIVER_CAPABILITIES:-}
4545
# see https://docs.nvidia.com/datacenter/cloud-native/container-toolkit/user-guide.html#driver-capabilities
4646
DISPLAY: ${DISPLAY:-":0"}
4747
QT_X11_NO_MITSHM: 1

src/lib/core/execute/run.slurm.bash

Lines changed: 7 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -69,6 +69,7 @@ function dna::run_slurm_teardown_callback() {
6969
local compose_path="${DNA_ROOT:?err}/src/lib/core/docker"
7070
local the_compose_file=docker-compose.project.run.slurm.yaml
7171
local running_container_ids
72+
source "${DNA_LIB_PATH:?err}/core/utils/load_super_project_config.bash"
7273
running_container_ids=$(docker compose -f "${compose_path}/${the_compose_file}" ps --quiet --all --orphans=false)
7374
if [[ -n ${running_container_ids} ]]; then
7475
for each_id in "${running_container_ids[@]}"; do
@@ -253,11 +254,13 @@ function dna::run_slurm() {
253254
n2st::print_msg "Execute docker ${MSG_DIMMED_FORMAT}${docker_log[*]}${MSG_END_FORMAT}"
254255
# Note: Operator "2>&1 |" redirect both stdin and stderr (portable version of "|&")
255256
docker "${docker_log[@]}" "${container_id}" 2>&1 | tee "${SUPER_PROJECT_ROOT:?err}/${log_path}/${log_name}.log"
257+
exit_code=$?
258+
else
259+
echo && n2st::print_msg "Execute ${MSG_DIMMED_FORMAT}docker wait${MSG_END_FORMAT}"
260+
docker wait "${container_id}" # Required if docker logs is skipped
261+
#docker compose "${compose_flags[@]}" wait "${the_service}" # Required if docker logs is skipped
262+
exit_code=$?
256263
fi
257-
258-
echo && n2st::print_msg "Execute ${MSG_DIMMED_FORMAT}docker wait${MSG_END_FORMAT}"
259-
docker wait "${container_id}" # Required if docker logs is skipped
260-
exit_code=$?
261264
fi
262265

263266
# ....Teardown...................................................................................

src/lib/core/utils/cuda_tools.bash

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -196,7 +196,7 @@ function dna::configure_gpu_capabilities() {
196196
fi
197197

198198
# ....Begin......................................................................................
199-
if [[ $NVIDIA_VISIBLE_DEVICES == void ]]; then
199+
if [[ -z $NVIDIA_VISIBLE_DEVICES ]] || [[ $NVIDIA_VISIBLE_DEVICES == void ]]; then
200200
n2st::print_msg "No nvidia gpu support expected by user"
201201
NVIDIA_VISIBLE_DEVICES=void
202202
NVIDIA_DRIVER_CAPABILITIES=""

src/lib/template/.dockerized_norlab/configuration/.env

Lines changed: 15 additions & 14 deletions
Original file line numberDiff line numberDiff line change
@@ -12,35 +12,36 @@
1212

1313
# ....GPU..........................................................................................
1414
# Reference: https://docs.nvidia.com/datacenter/cloud-native/container-toolkit/latest/docker-specialized.html
15-
NVIDIA_VISIBLE_DEVICES=all
16-
# Options: "all", "none", "void", gpu UUID e.g., "GPU-fef8089b or gpu indexes list e.g., "0,1,2"
1715

18-
NVIDIA_DRIVER_CAPABILITIES=all
19-
# Options: "all" or a list of comma-separated driver's name e.g., "compute,utility"
16+
# Nvidia visible devices options: "all", "none", "void", gpu UUID e.g., "GPU-fef8089b or gpu indexes list e.g., "0,1,2"
17+
#NVIDIA_VISIBLE_DEVICES=all
18+
19+
# Nvidia driver capabilities options: "all" or a list of comma-separated driver's name e.g., "compute,utility"
2020
# Available driver: compute, compat32, graphics, utility, video or display
21+
#NVIDIA_DRIVER_CAPABILITIES=all
2122

2223
# ....ROS..........................................................................................
23-
ROS_DOMAIN_ID=1
24+
#ROS_DOMAIN_ID=1
2425

2526
# Enable ROS2 log collouring
26-
RCUTILS_COLORIZED_OUTPUT=1
27+
#RCUTILS_COLORIZED_OUTPUT=1
2728

28-
RMW_IMPLEMENTATION=rmw_cyclonedds_cpp
29-
# Option: rmw_fastrtps_cpp, rmw_cyclonedds_cpp
29+
# rmw implementation options: rmw_fastrtps_cpp, rmw_cyclonedds_cpp
30+
#RMW_IMPLEMENTATION=rmw_cyclonedds_cpp
3031

31-
DDS_NETWORK_INTERFACE=eth0
32-
CYCLONEDDS_URI="<CycloneDDS><Domain><General><NetworkInterface>${DDS_NETWORK_INTERFACE:?err}</></></></>"
32+
#DDS_NETWORK_INTERFACE=eth0
33+
#CYCLONEDDS_URI="<CycloneDDS><Domain><General><NetworkInterface>${DDS_NETWORK_INTERFACE:?err}</></></></>"
3334
# Fix for warning "ros2: using network interface eth0 (udp/169.254.205.89) selected
3435
# arbitrarily from: eth0, wlan0, docker0".
3536
# Solution ref: https://answers.ros.org/question/375360/multiple-network-interfaces-with-rmw_cyclonedds_cpp/
3637

3738
# ....Python.......................................................................................
38-
PYTHONUNBUFFERED=1
39-
PYCHARM_DEBUG=1
39+
#PYTHONUNBUFFERED=1
40+
#PYCHARM_DEBUG=1
4041
#PYTEST_DEBUG=1
4142

4243
# ....Hydra........................................................................................
43-
HYDRA_FULL_ERROR=1
44+
#HYDRA_FULL_ERROR=1
4445

4546
# Set omegaconf full error backtrace
46-
OC_CAUSE=1
47+
#OC_CAUSE=1

src/lib/template/slurm_jobs/slurm_job.hydra_template.bash

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -58,7 +58,7 @@ hydra_flags+=("launcher/example_app.py")
5858
# ....Debug flags..................................................................................
5959
dna_run_slurm_flags+=(--register-hydra-dry-run-flag "+new_key='fake-value'")
6060

61-
#dna_run_slurm_flags+=("--skip-core-force-rebuild")
61+
dna_run_slurm_flags+=("--skip-core-force-rebuild")
6262
#dna_run_slurm_flags+=("--dry-run")
6363
#hydra_flags+=("--cfg" "all")
6464

Lines changed: 76 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,76 @@
1+
#!/bin/bash
2+
#SBATCH --gres=gpu:1
3+
#SBATCH --cpus-per-task=12
4+
#SBATCH --time=7-00:00
5+
#SBATCH --output=out/%x-%j.out
6+
7+
8+
# Note: Flag time format --time=D-HH:MM -> D=day, HH=hours, MM=minutes
9+
10+
# =================================================================================================
11+
# Execute slurm job
12+
#
13+
# Usage:
14+
# $ bash slurm_job.template.bash [<any-dna-argument>]
15+
#
16+
# =================================================================================================
17+
declare -x SJOB_ID
18+
declare -a dna_run_slurm_flags=()
19+
declare -a hydra_flags=()
20+
21+
# ====Setup========================================================================================
22+
# ....Custom setup (optional)......................................................................
23+
function dna::job_setup_callback() {
24+
# TODO: Add any instruction that should be executed before 'dna run slurm' command
25+
:
26+
}
27+
28+
# ....Custom teardown (optional)...................................................................
29+
function dna::job_teardown_callback() {
30+
local exit_code=$?
31+
# TODO: Add any instruction that should be executed after 'dna run slurm' exit.
32+
33+
# Note: Command 'dna run slurm' already handle stoping the container in case the slurm command
34+
# `scancel` is issued.
35+
exit ${exit_code:-1}
36+
}
37+
38+
# ....Set job name.................................................................................
39+
# TODO: Set SJOB_ID
40+
SJOB_ID="default"
41+
# Note: Recommend opening an issue tracker task (e.g., YouTrack, GitHub issue, Trello)
42+
# and use its issue ID as an SJOB_ID.
43+
44+
# ....Hydra app module.............................................................................
45+
# TODO: Set python module to launch
46+
hydra_flags+=("launcher/example_app_hparm_optim.py")
47+
# Note: assume container workdir is `<super-project>/src/`
48+
49+
# ....Optional hydra flags.........................................................................
50+
# --config-path,-cp : Overrides the config_path specified in hydra.main(). (absolute or relative)
51+
# --config-name,-cn : Overrides the config_name specified in hydra.main()
52+
# --config-dir,-cd : Adds an additional config dir to the config search path
53+
54+
#hydra_flags+=("--config-path=")
55+
#hydra_flags+=("--config-dir=")
56+
#hydra_flags+=("--config-name=")
57+
58+
# ....Debug flags..................................................................................
59+
dna_run_slurm_flags+=(--register-hydra-dry-run-flag "+new_key='fake-value'")
60+
61+
dna_run_slurm_flags+=("--skip-core-force-rebuild")
62+
#dna_run_slurm_flags+=("--dry-run")
63+
#hydra_flags+=("--cfg" "all")
64+
65+
# ====DNA internal=================================================================================
66+
dna_run_slurm_flags+=("--log-name" "$(basename -s .bash $0)")
67+
dna_run_slurm_flags+=("--log-path" "artifact/slurm_jobs_logs")
68+
dna_run_slurm_flags+=("$@")
69+
export SJOB_ID
70+
dna::job_setup_callback
71+
trap dna::job_teardown_callback EXIT
72+
73+
# ====Launch slurm job=============================================================================
74+
dna version --all
75+
dna run slurm "${SJOB_ID:?err}" "${dna_run_slurm_flags[@]}" "${hydra_flags[@]}"
76+

src/lib/template/slurm_jobs/slurm_job.template.bash

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -47,7 +47,7 @@ python_arguments+=("launcher/example.py")
4747
# Note: assume container workdir is `<super-project>/src/`
4848

4949
# ....Debug flags..................................................................................
50-
#dna_run_slurm_flags+=("--skip-core-force-rebuild")
50+
dna_run_slurm_flags+=("--skip-core-force-rebuild")
5151
#dna_run_slurm_flags+=("--dry-run")
5252

5353
# ====DNA internal=================================================================================

0 commit comments

Comments
 (0)