Skip to content

Commit a53b165

Browse files
author
Timothy Le
committed
example of using isaac with reachy
Signed-off-by: Timothy Le <tle@nebius.com>
1 parent 8d804cb commit a53b165

File tree

1 file changed

+83
-0
lines changed

1 file changed

+83
-0
lines changed
Lines changed: 83 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,83 @@
1+
workflow:
2+
name: osmo-isaac-reachy-rl
3+
pool: default
4+
5+
resources:
6+
nebius-cli:
7+
cpu: 1
8+
memory: 2Gi
9+
platform: L40S
10+
11+
tasks:
12+
- name: trigger-reachy-training
13+
image: ubuntu:22.04
14+
resource: nebius-cli
15+
16+
environment:
17+
SUBNET_ID: "vpcsubnet-XXX"
18+
PARENT_ID: "project-XXX"
19+
FILESYSTEM_ID: "computefilesystem-XXX"
20+
NEBIUS_IAM_TOKEN: "NEBIUS_IAM_TOKEN"
21+
NGC_API_KEY: "nvapi-XXX"
22+
# RL training parameters
23+
TASK: "Isaac-Reach-Franka-v0" # swap for Isaac-Reach-Reachy-v0 when extension is installed
24+
NUM_ENVS: "4096"
25+
MAX_ITERATIONS: "500"
26+
27+
command:
28+
- bash
29+
- -lc
30+
args:
31+
- |
32+
set -euo pipefail
33+
34+
echo "--- 1. Install Nebius CLI ---"
35+
apt-get update && apt-get install -y curl bash jq
36+
37+
curl -sSL https://storage.eu-north1.nebius.cloud/cli/install.sh | bash
38+
39+
export PATH="$PATH:/root/.nebius/bin"
40+
nebius version
41+
42+
echo "--- 2. Configure Nebius credentials ---"
43+
mkdir -p /root/.nebius
44+
printf 'current-profile: default\nprofiles:\n default:\n endpoint: api.eu-north1.nebius.cloud:443\n parent-id: %s\n' \
45+
"$PARENT_ID" > /root/.nebius/config.yaml
46+
47+
echo "--- 3. Launch Isaac Lab training job ---"
48+
RUN_NAME="${TASK}-$(date +%s)"
49+
JOB_OUTPUT=$(nebius ai job create \
50+
--name "reachy-train-$(date +%s)" \
51+
--image "nvcr.io/nvidia/isaac-lab:2.3.2" \
52+
--registry-username "\$oauthtoken" \
53+
--registry-password "$NGC_API_KEY" \
54+
--platform "gpu-l40s-d" \
55+
--preset "1gpu-16vcpu-96gb" \
56+
--volume "$FILESYSTEM_ID:/mnt/storage" \
57+
--container-command "bash" \
58+
--args "-c '/workspace/isaaclab/isaaclab.sh -p /workspace/isaaclab/scripts/reinforcement_learning/rsl_rl/train.py --task $TASK --headless --num_envs $NUM_ENVS --max_iterations $MAX_ITERATIONS +runner.log_root_path=/mnt/storage/logs 2>&1'" \
59+
--timeout "120m" \
60+
--subnet-id "$SUBNET_ID" \
61+
--parent-id "$PARENT_ID")
62+
echo "Job create response: $JOB_OUTPUT"
63+
JOB_ID=$(echo "$JOB_OUTPUT" | grep -oE 'aijob-[a-z0-9]+' | head -1)
64+
echo "Submitted training job: $JOB_ID"
65+
66+
echo "--- 4. Wait for training completion ---"
67+
while true; do
68+
JOB_RAW=$(nebius ai job get --id "$JOB_ID" --format json 2>&1)
69+
STATUS=$(echo "$JOB_RAW" | jq -r '.status.phase // .status.state // empty' 2>/dev/null \
70+
|| echo "$JOB_RAW" | grep -oE '(RUNNING|SUCCEEDED|COMPLETED|FAILED|ERROR|CANCELLED|STARTING|PENDING)' | head -1)
71+
echo "Training job status: $STATUS"
72+
case "$STATUS" in
73+
SUCCEEDED|COMPLETED)
74+
echo "Training completed successfully."
75+
echo "Logs and checkpoints saved to: /mnt/storage/logs/$TASK/"
76+
break ;;
77+
FAILED|ERROR|CANCELLED)
78+
echo "Training failed with status: $STATUS"
79+
nebius ai job logs "$JOB_ID" 2>&1 | tail -50
80+
exit 1 ;;
81+
*) sleep 60 ;;
82+
esac
83+
done

0 commit comments

Comments
 (0)