Skip to content

Commit 54abe29

Browse files
Merge branch 'main' of https://github.com/pytorch/rl
2 parents 8be545b + bec4498 commit 54abe29

38 files changed

+569
-186
lines changed

.github/workflows/build-wheels-aarch64-linux.yml

Lines changed: 13 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -12,6 +12,12 @@ on:
1212
# Release candidate tags look like: v1.11.0-rc1
1313
- v[0-9]+.[0-9]+.[0-9]+-rc[0-9]+
1414
workflow_dispatch:
15+
inputs:
16+
build-cpu:
17+
description: 'Build CPU wheels'
18+
required: false
19+
type: boolean
20+
default: true
1521
workflow_call:
1622
inputs:
1723
test-infra-ref:
@@ -29,6 +35,11 @@ on:
2935
required: false
3036
type: string
3137
default: ''
38+
with-cpu:
39+
description: 'Build with CPU (enable/disable)'
40+
required: false
41+
type: string
42+
default: 'enable'
3243

3344
permissions:
3445
id-token: write
@@ -48,7 +59,9 @@ jobs:
4859
os: linux-aarch64
4960
test-infra-repository: pytorch/test-infra
5061
test-infra-ref: ${{ inputs.test-infra-ref || 'main' }}
62+
# aarch64 only supports CPU builds
5163
with-cuda: disable
64+
with-cpu: ${{ github.event_name == 'workflow_dispatch' && (inputs.build-cpu && 'enable' || 'disable') || inputs.with-cpu || 'enable' }}
5265
channel: ${{ inputs.channel || '' }}
5366
use-only-dl-pytorch-org: ${{ inputs.channel == 'release' && 'true' || 'false' }}
5467
build:

.github/workflows/build-wheels-linux.yml

Lines changed: 37 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -12,6 +12,22 @@ on:
1212
# Release candidate tags look like: v1.11.0-rc1
1313
- v[0-9]+.[0-9]+.[0-9]+-rc[0-9]+
1414
workflow_dispatch:
15+
inputs:
16+
build-cpu:
17+
description: 'Build CPU wheels'
18+
required: false
19+
type: boolean
20+
default: true
21+
build-cuda:
22+
description: 'Build CUDA wheels'
23+
required: false
24+
type: boolean
25+
default: false
26+
build-rocm:
27+
description: 'Build ROCm wheels'
28+
required: false
29+
type: boolean
30+
default: false
1531
workflow_call:
1632
inputs:
1733
test-infra-ref:
@@ -29,6 +45,21 @@ on:
2945
required: false
3046
type: string
3147
default: ''
48+
with-cuda:
49+
description: 'Build with CUDA (enable/disable)'
50+
required: false
51+
type: string
52+
default: 'enable'
53+
with-rocm:
54+
description: 'Build with ROCm (enable/disable)'
55+
required: false
56+
type: string
57+
default: 'enable'
58+
with-cpu:
59+
description: 'Build with CPU (enable/disable)'
60+
required: false
61+
type: string
62+
default: 'enable'
3263

3364
permissions:
3465
id-token: write
@@ -50,6 +81,12 @@ jobs:
5081
test-infra-ref: ${{ inputs.test-infra-ref || 'main' }}
5182
channel: ${{ inputs.channel || '' }}
5283
use-only-dl-pytorch-org: ${{ inputs.channel == 'release' && 'true' || 'false' }}
84+
# For workflow_dispatch: convert boolean to enable/disable string
85+
# For workflow_call: use the string input directly
86+
# Default: enable all variants
87+
with-cuda: ${{ github.event_name == 'workflow_dispatch' && (inputs.build-cuda && 'enable' || 'disable') || inputs.with-cuda || 'enable' }}
88+
with-rocm: ${{ github.event_name == 'workflow_dispatch' && (inputs.build-rocm && 'enable' || 'disable') || inputs.with-rocm || 'enable' }}
89+
with-cpu: ${{ github.event_name == 'workflow_dispatch' && (inputs.build-cpu && 'enable' || 'disable') || inputs.with-cpu || 'enable' }}
5390
build:
5491
needs: generate-matrix
5592
strategy:

.github/workflows/build-wheels-m1.yml

Lines changed: 13 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -12,6 +12,12 @@ on:
1212
# Release candidate tags look like: v1.11.0-rc1
1313
- v[0-9]+.[0-9]+.[0-9]+-rc[0-9]+
1414
workflow_dispatch:
15+
inputs:
16+
build-cpu:
17+
description: 'Build CPU wheels'
18+
required: false
19+
type: boolean
20+
default: true
1521
workflow_call:
1622
inputs:
1723
test-infra-ref:
@@ -29,6 +35,11 @@ on:
2935
required: false
3036
type: string
3137
default: ''
38+
with-cpu:
39+
description: 'Build with CPU (enable/disable)'
40+
required: false
41+
type: string
42+
default: 'enable'
3243

3344
permissions:
3445
id-token: write
@@ -50,6 +61,8 @@ jobs:
5061
test-infra-ref: ${{ inputs.test-infra-ref || 'main' }}
5162
channel: ${{ inputs.channel || '' }}
5263
use-only-dl-pytorch-org: ${{ inputs.channel == 'release' && 'true' || 'false' }}
64+
# macOS only supports CPU builds
65+
with-cpu: ${{ github.event_name == 'workflow_dispatch' && (inputs.build-cpu && 'enable' || 'disable') || inputs.with-cpu || 'enable' }}
5366
build:
5467
needs: generate-matrix
5568
strategy:

.github/workflows/build-wheels-windows.yml

Lines changed: 26 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -12,6 +12,17 @@ on:
1212
# Release candidate tags look like: v1.11.0-rc1
1313
- v[0-9]+.[0-9]+.[0-9]+-rc[0-9]+
1414
workflow_dispatch:
15+
inputs:
16+
build-cpu:
17+
description: 'Build CPU wheels'
18+
required: false
19+
type: boolean
20+
default: true
21+
build-cuda:
22+
description: 'Build CUDA wheels'
23+
required: false
24+
type: boolean
25+
default: false
1526
workflow_call:
1627
inputs:
1728
test-infra-ref:
@@ -29,6 +40,16 @@ on:
2940
required: false
3041
type: string
3142
default: ''
43+
with-cuda:
44+
description: 'Build with CUDA (enable/disable)'
45+
required: false
46+
type: string
47+
default: 'enable'
48+
with-cpu:
49+
description: 'Build with CPU (enable/disable)'
50+
required: false
51+
type: string
52+
default: 'enable'
3253

3354
permissions:
3455
id-token: write
@@ -50,6 +71,11 @@ jobs:
5071
test-infra-ref: ${{ inputs.test-infra-ref || 'main' }}
5172
channel: ${{ inputs.channel || '' }}
5273
use-only-dl-pytorch-org: ${{ inputs.channel == 'release' && 'true' || 'false' }}
74+
# For workflow_dispatch: convert boolean to enable/disable string
75+
# For workflow_call: use the string input directly
76+
# Default: enable all variants (Windows has no ROCm)
77+
with-cuda: ${{ github.event_name == 'workflow_dispatch' && (inputs.build-cuda && 'enable' || 'disable') || inputs.with-cuda || 'enable' }}
78+
with-cpu: ${{ github.event_name == 'workflow_dispatch' && (inputs.build-cpu && 'enable' || 'disable') || inputs.with-cpu || 'enable' }}
5379
build:
5480
needs: generate-matrix
5581
strategy:

.github/workflows/release.yml

Lines changed: 27 additions & 17 deletions
Original file line numberDiff line numberDiff line change
@@ -16,10 +16,10 @@
1616
# - main, nightly, PRs: install from git (latest dev)
1717
# - Can be overridden with tensordict_source input
1818
#
19-
# Wheel Variants:
20-
# - cpu (default): Recommended for torchrl - avoids duplicate filename conflicts
21-
# - gpu: Only CUDA builds
22-
# - all: All variants (with deduplication to prevent corruption)
19+
# Wheel Variants (selectable via checkboxes):
20+
# - build_cpu (default: true): Recommended for torchrl - pure Python library
21+
# - build_cuda (default: false): CUDA builds (Linux, Windows)
22+
# - build_rocm (default: false): ROCm builds (Linux only)
2323
#
2424
# NOTE: This workflow is NOT automatically triggered on tag push to avoid
2525
# race conditions with wheel builds. Use workflow_dispatch to trigger releases.
@@ -57,15 +57,21 @@ on:
5757
- 'stable'
5858
- 'git'
5959
default: 'auto'
60-
wheel_variants:
61-
description: 'Which wheel variants to collect (cpu recommended for torchrl)'
60+
build_cpu:
61+
description: 'Build CPU wheels (recommended for torchrl - pure Python library)'
6262
required: false
63-
type: choice
64-
options:
65-
- 'cpu'
66-
- 'gpu'
67-
- 'all'
68-
default: 'cpu'
63+
type: boolean
64+
default: true
65+
build_cuda:
66+
description: 'Build CUDA wheels'
67+
required: false
68+
type: boolean
69+
default: false
70+
build_rocm:
71+
description: 'Build ROCm wheels (Linux only)'
72+
required: false
73+
type: boolean
74+
default: false
6975

7076
# Ensure only one release workflow runs at a time
7177
# cancel-in-progress: true means new runs cancel previous ones
@@ -259,6 +265,9 @@ jobs:
259265
test-infra-ref: ${{ inputs.pytorch_release || 'main' }}
260266
tensordict-source: ${{ inputs.tensordict_source || 'auto' }}
261267
channel: release
268+
with-cpu: ${{ inputs.build_cpu && 'enable' || 'disable' }}
269+
with-cuda: ${{ inputs.build_cuda && 'enable' || 'disable' }}
270+
with-rocm: ${{ inputs.build_rocm && 'enable' || 'disable' }}
262271
secrets: inherit
263272

264273
build-windows:
@@ -271,6 +280,8 @@ jobs:
271280
test-infra-ref: ${{ inputs.pytorch_release || 'main' }}
272281
tensordict-source: ${{ inputs.tensordict_source || 'auto' }}
273282
channel: release
283+
with-cpu: ${{ inputs.build_cpu && 'enable' || 'disable' }}
284+
with-cuda: ${{ inputs.build_cuda && 'enable' || 'disable' }}
274285
secrets: inherit
275286

276287
build-macos:
@@ -283,6 +294,7 @@ jobs:
283294
test-infra-ref: ${{ inputs.pytorch_release || 'main' }}
284295
tensordict-source: ${{ inputs.tensordict_source || 'auto' }}
285296
channel: release
297+
with-cpu: ${{ inputs.build_cpu && 'enable' || 'disable' }}
286298
secrets: inherit
287299

288300
build-aarch64:
@@ -295,6 +307,7 @@ jobs:
295307
test-infra-ref: ${{ inputs.pytorch_release || 'main' }}
296308
tensordict-source: ${{ inputs.tensordict_source || 'auto' }}
297309
channel: release
310+
with-cpu: ${{ inputs.build_cpu && 'enable' || 'disable' }}
298311
secrets: inherit
299312

300313
# =============================================================================
@@ -330,12 +343,9 @@ jobs:
330343
uses: actions/download-artifact@v4
331344
with:
332345
path: wheels-raw
333-
# Pattern based on wheel_variants input:
334-
# - cpu: Only CPU builds (recommended - avoids duplicate wheel conflicts)
335-
# - gpu: Only CUDA builds
336-
# - all: All variants (requires deduplication)
346+
# Download all pytorch_rl artifacts - filtering by selected variants happens below
337347
# pytorch/test-infra uploads artifacts named like: pytorch_rl__3.11_cpu_x86_64
338-
pattern: ${{ inputs.wheel_variants == 'gpu' && 'pytorch_rl__*_cu*' || inputs.wheel_variants == 'all' && 'pytorch_rl*' || 'pytorch_rl__*_cpu_*' }}
348+
pattern: pytorch_rl*
339349
merge-multiple: true
340350

341351
- name: Deduplicate and verify wheels

knowledge_base/DM_CONTROL_INSTALLATION.md

Lines changed: 61 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -62,9 +62,69 @@ pip install dm-env dm-tree glfw lxml mujoco numpy pyopengl pyparsing scipy
6262

6363
### 2. EGL/rendering issues
6464

65-
See [MUJOCO_INSTALLATION.md](./MUJOCO_INSTALLATION.md) for rendering-related
65+
See [MUJOCO_INSTALLATION.md](./MUJOCO_INSTALLATION.md) for rendering-related
6666
issues, as dm_control uses MuJoCo for rendering.
6767

68+
#### EGL multi-GPU device selection in containers (Docker / SLURM)
69+
70+
When running `ParallelEnv` with pixel-based dm_control environments on a
71+
multi-GPU machine, all rendering contends on a **single GPU** — even if the
72+
host has 8 GPUs. This inflates per-worker render time by ~3x (e.g. 17ms serial
73+
→ 54ms with 8 workers sharing one GPU's EGL queue).
74+
75+
**Root cause:** Inside Docker or SLURM containers, the NVIDIA container runtime
76+
only exposes the GPU(s) assigned to the job to EGL. `eglQueryDevicesEXT()`
77+
returns 1 device regardless of how many physical GPUs the host has.
78+
Setting `MUJOCO_EGL_DEVICE_ID` or `EGL_DEVICE_ID` to anything other than 0
79+
raises:
80+
81+
```
82+
RuntimeError: MUJOCO_EGL_DEVICE_ID must be an integer between 0 and 0 (inclusive), got 1.
83+
```
84+
85+
Unsetting `CUDA_VISIBLE_DEVICES` in the worker does **not** help — the
86+
container isolation happens at the NVIDIA driver/runtime level, below the
87+
environment variable.
88+
89+
**Note on variable naming:** dm_control uses `MUJOCO_EGL_DEVICE_ID` internally
90+
(which maps to the same thing as MuJoCo's variable). Historically there was
91+
also `EGL_DEVICE_ID` used by older dm_control versions. See
92+
[dm_control#345](https://github.com/google-deepmind/dm_control/issues/345)
93+
for the unification discussion.
94+
95+
**Upstream issues:**
96+
- [mujoco#572 — Cannot access all GPUs through EGL devices when using docker](https://github.com/google-deepmind/mujoco/issues/572)
97+
- [dm_control#345 — Unify EGL_DEVICE_ID with MUJOCO_EGL_DEVICE_ID](https://github.com/google-deepmind/dm_control/issues/345)
98+
99+
**Workarounds:**
100+
101+
1. **Configure container for full GPU access.** If you control the container
102+
runtime, set `NVIDIA_VISIBLE_DEVICES=all` and
103+
`NVIDIA_DRIVER_CAPABILITIES=all` so EGL can see all GPUs. Then assign
104+
`MUJOCO_EGL_DEVICE_ID=<worker_idx % num_gpus>` per worker process
105+
**before** dm_control is imported (the EGL display is created at import
106+
time).
107+
108+
2. **Run outside containers.** On bare metal, `eglQueryDevicesEXT()` correctly
109+
returns all GPUs (plus the X server display, if any).
110+
111+
3. **Reduce rendering overhead.** If multi-GPU rendering is not possible:
112+
- Lower the rendering resolution (e.g. 64x64 instead of 84x84)
113+
- Render at a lower frequency than the simulation step (frame-skip)
114+
- Use state-only observations where possible — the IPC overhead is small
115+
compared to rendering
116+
117+
#### No batched rendering support in MuJoCo
118+
119+
MuJoCo does not support batched GPU rendering — each environment renders its
120+
scene independently through its own OpenGL context. There is no API to submit
121+
multiple scenes to the GPU in one call.
122+
123+
MuJoCo XLA (MJX) accelerates *simulation* on GPU via JAX but still requires
124+
copying data back to CPU for rendering through the standard `mujoco.Renderer`
125+
pipeline. See [mujoco#1604](https://github.com/google-deepmind/mujoco/issues/1604)
126+
for discussion on batched rendering support.
127+
68128
### 3. macOS ARM64 (Apple Silicon) specific issues
69129

70130
On Apple Silicon Macs, ensure you're using native ARM Python, not Rosetta:

sota-implementations/a2c/a2c_atari.py

Lines changed: 1 addition & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -279,8 +279,7 @@ def update(batch, max_grad_norm=cfg.optim.max_grad_norm):
279279
if logger:
280280
metrics_to_log.update(timeit.todict(prefix="time"))
281281
metrics_to_log["time/speed"] = pbar.format_dict["rate"]
282-
for key, value in metrics_to_log.items():
283-
logger.log_scalar(key, value, collected_frames)
282+
logger.log_metrics(metrics_to_log, collected_frames)
284283

285284
collector.shutdown()
286285
if not test_env.is_closed:

sota-implementations/a2c/a2c_mujoco.py

Lines changed: 1 addition & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -261,8 +261,7 @@ def update(batch):
261261
if logger:
262262
metrics_to_log.update(timeit.todict(prefix="time"))
263263
metrics_to_log["time/speed"] = pbar.format_dict["rate"]
264-
for key, value in metrics_to_log.items():
265-
logger.log_scalar(key, value, collected_frames)
264+
logger.log_metrics(metrics_to_log, collected_frames)
266265

267266
collector.shutdown()
268267
if not test_env.is_closed:

sota-implementations/cql/utils.py

Lines changed: 1 addition & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -462,8 +462,7 @@ def make_continuous_cql_optimizer(cfg, loss_module):
462462

463463
def log_metrics(logger, metrics, step):
464464
if logger is not None:
465-
for metric_name, metric_value in metrics.items():
466-
logger.log_scalar(metric_name, metric_value, step)
465+
logger.log_metrics(metrics, step)
467466

468467

469468
def dump_video(module):

sota-implementations/crossq/utils.py

Lines changed: 1 addition & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -305,8 +305,7 @@ def make_crossQ_optimizer(cfg, loss_module):
305305

306306

307307
def log_metrics(logger, metrics, step):
308-
for metric_name, metric_value in metrics.items():
309-
logger.log_scalar(metric_name, metric_value, step)
308+
logger.log_metrics(metrics, step)
310309

311310

312311
def get_activation(activation: str):

0 commit comments

Comments
 (0)