Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
36 commits
Select commit Hold shift + click to select a range
925162e
add batch script (unfinished)
Oculux314 Nov 16, 2025
fd02050
correct config_templates
Oculux314 Nov 20, 2025
ad0a1a4
integrate batch code into main run.py
Oculux314 Nov 20, 2025
8e2413e
fix bugs in batch run logic
Oculux314 Nov 20, 2025
de475eb
implement a better way to name batch runs
Oculux314 Nov 20, 2025
6519801
allow skipping certain batch configurations
Oculux314 Nov 20, 2025
afa7ed8
start wk2 Fri tests
Oculux314 Nov 21, 2025
7d82ddd
improve batch logging/confirmation
Oculux314 Nov 25, 2025
6941a20
fix inverted _skip function for GoLU runs
Oculux314 Nov 25, 2025
9dd8289
add temporary code to allow skipping runs
Oculux314 Dec 1, 2025
d5df26b
add strip_logs script
Oculux314 Dec 5, 2025
775015a
switch to batch leaky relu and prelu
Oculux314 Dec 5, 2025
5e66e31
enable only running a range during batching
Oculux314 Dec 5, 2025
a16fa92
Auto-format code 🧹🌟🤖
Dec 5, 2025
e2b33f9
Merge branch 'main' into nwil508
Oculux314 Dec 5, 2025
67a34b2
fix batch range logic
Oculux314 Dec 5, 2025
440be54
switch to DMCS gym
Oculux314 Dec 9, 2025
88ab50b
add CARES base dockerfile
Oculux314 Dec 11, 2025
c54a9cd
fix bug where runs marked as skipped were desynced from runs which we…
Oculux314 Dec 13, 2025
8d0d9e6
tie batch range skipping logic to the coordinator object to make skip…
Oculux314 Dec 13, 2025
7c215fc
Auto-format code 🧹🌟🤖
Dec 13, 2025
83383e2
remove activation function-specific batch configs
Oculux314 Dec 18, 2025
4201a1e
switch dockerfile to use main
Oculux314 Dec 18, 2025
e0f39d1
Add how to use batch runs to README
Oculux314 Dec 19, 2025
aea0e4d
Auto-format code 🧹🌟🤖
Dec 19, 2025
3eacc0c
add docker section to readme
Oculux314 Dec 19, 2025
742edb4
ensure batch logic only runs when training
Oculux314 Dec 22, 2025
c298145
suppress spam repeated config printing in batch runs
Oculux314 Dec 22, 2025
860a7a3
Auto-format code 🧹🌟🤖
Dec 22, 2025
b5ff646
apply long-running cuda fix
Oculux314 Dec 29, 2025
de58819
change Dockerfile log path template
Oculux314 Dec 29, 2025
fd6fe4b
fix CARES_LOG_PATH_TEMPLATE to use run_name
Oculux314 Jan 8, 2026
1b3c1ef
add information about nvidia Docker bug
Oculux314 Jan 8, 2026
ad87d31
Merge remote-tracking branch 'origin/main' into nwil508
Oculux314 Jan 13, 2026
fefbc3e
remove unnecessary git branch changes and add welcome message
Feb 1, 2026
ac641ef
add CUDA version warning
Oculux314 Feb 10, 2026
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
64 changes: 64 additions & 0 deletions Dockerfile
Original file line number Diff line number Diff line change
@@ -0,0 +1,64 @@
# docker build -t oculux314/cares:base . (use --no-cache to rebuild from start)
# docker run -it --gpus all oculux314/cares:base

# Container CUDA version needs to be less than or equal to the host CUDA version. If you get an error about CUDA version mismatch, try using an older image (e.g. nvidia/cuda:12.0.1-cudnn8-runtime-ubuntu22.04) and rebuilding the container.
FROM nvidia/cuda:12.1.1-cudnn8-runtime-ubuntu22.04
ENV MUJOCO_GL=osmesa
ENV CARES_LOG_BASE_DIR=/app/cares_rl_logs
WORKDIR /app

# -------------------------------------------------------------------
# Installation
# -------------------------------------------------------------------

RUN apt-get update && apt-get install -y \
python-is-python3 \
python3-venv \
python3-pip \
git \
# This is needed for mujoco
libgl1 \
libglib2.0-0 \
libsm6 \
libxext6 \
libxrender1 \
libosmesa6 \
libosmesa6-dev \
mesa-utils \
libgl1-mesa-glx \
libgl1-mesa-dri \
nano

# -------------------------------------------------------------------
# Clone repos
# -------------------------------------------------------------------

# gymnasium_envrionments - training engine and core environments
RUN git clone https://github.com/UoA-CARES/gymnasium_envrionments.git

# cares_reinforcement_learning - RL algorithms
RUN git clone https://github.com/UoA-CARES/cares_reinforcement_learning.git

# -------------------------------------------------------------------
# Setup cares_reinforcement_learning
# -------------------------------------------------------------------

WORKDIR /app/cares_reinforcement_learning
RUN pip install -r requirements.txt
RUN pip install -e .

# -------------------------------------------------------------------
# Setup gymnasium_envrionments
# -------------------------------------------------------------------

WORKDIR /app/gymnasium_envrionments
RUN pip install -r requirements.txt

# -------------------------------------------------------------------
# Runtime
# -------------------------------------------------------------------

ENV CARES_LOG_PATH_TEMPLATE="{algorithm}/{run_name}{algorithm}-{date}"
WORKDIR /app/gymnasium_envrionments/scripts
CMD ["bash", "-c", "echo '======================================================================\nRun `python run.py train cli --gym openai --task HalfCheetah-v4 SAC` to start a training run.\n======================================================================' && \
bash"]
49 changes: 49 additions & 0 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -156,3 +156,52 @@ Plot and compare the results of two or more training instances
```sh
python3 plotter.py -s ~/cares_rl_logs -d ~/cares_rl_logs/ALGORITHM_A/ALGORITHM_A-TASK-YY_MM_DD:HH:MM:SS ~/cares_rl_logs/ALGORITHM_B/ALGORITHM_B-TASK-YY_MM_DD:HH:MM:SS
```

# Running in Batch Mode
A set of different training instances (e.g. comparing different algorithms or environments) can be run in series using batch mode. This is compatible with running seeds in parrallel.

To use batch mode, append `--batch 1` to any train command, e.g.
```
python run.py train cli --gym dmcs --domain humanoid --task walk TD3 --batch 1
```

The specific instances to run can be configured in the `BATCH CONFIG` section of `scripts/batch_coordinator.py`. The cross product of these lists is used to create the set of instances to be run.
<p align="center">
<img src="./media/batch-config.png" style="width: 80%;"/>
</p>

The format is `field: [instances]` and mirrors the configuration object used in non-batched runs. It can be useful to set a breakpoint in `run.py` to view the configuration object when editing this file.
<p align="center">
<img src="./media/config-breakpoint.png" style="width: 80%;"/>
</p>

The `_skip()` function in `scripts/batch_coordinator.py` can be used to filter out undesired combinations - e.g. here, invalid domain-task pairings are skipped.
<p align="center">
<img src="./media/skip-function.png" style="width: 80%;"/>
</p>

Finally, a specific range of instances can be run by specifying `--b_start` and/or `--b_end`. The run order is deterministic.
```
python run.py train cli --gym dmcs --domain humanoid --task walk TD3 --batch 1 --b_start 2 --b_end -2
```
<p align="center">
<img src="./media/batch-range.png" style="width: 80%;"/>
</p>

# Using Docker
This repository can be run in a docker container using `docker run -it --gpus all oculux314/cares:base`. This will download an image of this repository, start it, and open up a bash terminal inside to run commands as usual. The `gymnasium_envrionments` `cares_reinforcement_learning` and `cares_rl_logs` folders are located in the `/app` directory within the container.

To open another terminal inside the same running docker container use `docker ps -a` to find the name of the container, then `docker exec -it <container> bash`. To copy files out of the docker container, use `docker cp <container>:<path> <host-path>` from the host.

In some situations, you may want to build your own version of the image (e.g. to modify some build steps). To do this run `docker build -t oculux314/cares:base .` from the root of this repository, overwriting any existing image, and then run the image as usual.

### Failed to initialize NVML: Unknown Error

There is a known bug where long-running Docker containers lose their nvidia session. If you see `Failed to initialize NVML: Unknown Error` (or similar), restart the docker container and resume training.

```
docker ps
docker stop <container>
docker start <container>
docker exec -it <container> bash
```
Binary file added media/batch-config.png
Loading
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
Binary file added media/batch-range.png
Loading
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
Binary file added media/config-breakpoint.png
Loading
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
Binary file added media/skip-function.png
Loading
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
163 changes: 163 additions & 0 deletions scripts/batch_coordinator.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,163 @@
"""
This module enables batch execution of multiple reinforcement learning experiments
with varying configurations. It constructs ExecutionCoordinator instances for each
configuration combination and prepares them for execution.
"""

import itertools
from typing import Any

from execution_coordinator import ExecutionCoordinator
import execution_logger as logs
from util.rl_parser import RLParser

# MARK: BATCH CONFIG
# Configure batch parameters here. The cross-product of these lists will be used
# to create multiple experiment configurations.

# Example: this will create 16 experiments for the 4 domains and 4 tasks.
# The _skip function skips invalid combinations for 4 experiments total.
batch_config: dict[str, list[Any | tuple[Any, str]]] = {
"env_config.domain": ["cheetah", "cartpole", "finger", "walker"],
"env_config.task": [
"run",
"swingup",
"spin",
("walk", "CUSTOM NAME"),
], # Can also use (value, name) tuples - useful when value is an object
}


# This function can be customized to skip certain invalid or undesired configurations.
def _skip(config: dict[str, tuple[Any, str]]) -> bool:
"""Determine if a given configuration combination should be skipped.
E.g., task walker.catch doesn't exist.

Args:
config (dict[str, tuple[Any, str]]): A configuration mapping where each
key is a configuration attribute path and each value is a tuple of
(actual_value, name).
Returns:
bool: True if the configuration should be skipped, False otherwise.
"""
# Example: match domain to task
return not (
(
config.get("env_config.domain", (None,))[0] == "cartpole"
and config.get("env_config.task", (None,))[0] == "swingup"
)
or (
config.get("env_config.domain", (None,))[0] == "finger"
and config.get("env_config.task", (None,))[0] == "spin"
)
or (
config.get("env_config.domain", (None,))[0] == "cheetah"
and config.get("env_config.task", (None,))[0] == "run"
)
or (
config.get("env_config.domain", (None,))[0] == "walker"
and config.get("env_config.task", (None,))[0] == "walk"
)
)


# -------------------------------------------------------------------
# MARK: INTERNALS
# -------------------------------------------------------------------

# Get the main logger for this function
logger = logs.get_main_logger()


def get_batch_coordinators() -> list[tuple[ExecutionCoordinator, str]]:
"""Create coordinators for every combination in batch_config.

The batch_config maps attribute paths (possibly dotted, e.g. "alg_config.actor_lr")
to lists of values. This function expands the Cartesian product of those lists,
creates an ExecutionCoordinator for each combination, applies the configuration
values to the coordinator, and returns a list of (coordinator, run_name) tuples.

Returns:
list[tuple[ExecutionCoordinator, str]]: A list where each tuple contains a
configured ExecutionCoordinator and a human-readable run name generated
by get_name_from_config.

Notes:
- This function only constructs and names coordinators; it does not start or run them.
- Keys in batch_config are resolved via attribute access on the coordinator.
"""
# Expand batch configs into all combinations
keys = list(batch_config.keys())
configs = [
_create_config(keys, config_values)
for config_values in itertools.product(*batch_config.values())
]

coordinators: list[tuple[ExecutionCoordinator, str]] = []
i = 0
for config in configs:
# Certain combinations may be invalid - skip these
# These are DIFFERENT from the [b_start, b_end] range filtering and don't show up at all
if _skip(config):
continue
i += 1

# Setup specific name and coordinator
coordinator = _config_to_coordinator(config)
_replace_configurations(coordinator, config)
run_name = _get_name_from_config(config, i)
coordinator.env_config.index = i # Set index for range filtering
coordinators.append((coordinator, run_name))

# Support negative indexing (i.e. replace -1 with last index)
num_coordinators = len(coordinators)
b_start = coordinators[0][0].env_config.b_start
b_end = coordinators[0][0].env_config.b_end
if b_start < 0:
b_start = num_coordinators + b_start + 1
if b_end < 0:
b_end = num_coordinators + b_end + 1
for coordinator, _ in coordinators:
coordinator.env_config.b_start = b_start
coordinator.env_config.b_end = b_end

return coordinators


def _create_config(
keys: list[str], config_values: tuple[Any | tuple[Any, str], ...]
) -> dict[str, tuple[Any, str]]:
config: dict[str, tuple[Any, str]] = {}
for i, value in enumerate(config_values):
# Ensure value is a tuple (actual_value, name)
if isinstance(value, tuple):
config[keys[i]] = value
else:
config[keys[i]] = (value, f"{keys[i]}-{value}")
return config


def _get_name_from_config(config: dict[str, tuple[Any, str]], index: int) -> str:
name_parts = []
for value in config.values():
name_parts.append(value[1])
return f"[{index}]_" + "_".join(name_parts)


def _config_to_coordinator(config: dict[str, tuple[Any, str]]) -> ExecutionCoordinator:
parser = RLParser()
base_configs = parser.parse_args()
coordinator = ExecutionCoordinator(base_configs, options={"noprint": True})
_replace_configurations(coordinator, config)
return coordinator


def _replace_configurations(
coordinator: ExecutionCoordinator, config: dict[str, tuple[Any, str]]
):
for key, value in config.items():
keys = key.split(".")
obj = coordinator
for k in keys[:-1]:
obj = getattr(obj, k)
setattr(obj, keys[-1], value[0]) # value is a tuple (actual_value, name)
11 changes: 9 additions & 2 deletions scripts/execution_coordinator.py
Original file line number Diff line number Diff line change
Expand Up @@ -8,6 +8,8 @@
import logging
import multiprocessing
import time
import gc
import torch
from multiprocessing.queues import Queue
from queue import Empty
from typing import Any
Expand Down Expand Up @@ -41,7 +43,7 @@ class ExecutionCoordinator:
- Testing: Testing final models only with specified episodes
"""

def __init__(self, configurations: dict[str, Any]):
def __init__(self, configurations: dict[str, Any], options: dict[str, Any] = {}):
"""
Initialize the ExecutionCoordinator with parsed configurations.

Expand All @@ -63,7 +65,8 @@ def __init__(self, configurations: dict[str, Any]):
self.base_log_dir: str | None = None

# Log all configurations for debugging
self._print_configurations()
if not options.get("noprint", False):
self._print_configurations()

def _print_configurations(self) -> None:
"""Log all configurations for debugging and reproducibility."""
Expand Down Expand Up @@ -486,3 +489,7 @@ def run(self) -> None:
self._test()
else:
raise ValueError(f"Unknown command: {self.run_config.command}")

# Clean up resources after execution
gc.collect()
torch.cuda.empty_cache()
Loading
Loading