UoA-CARES · Oculux314 · Nov 16, 2025 · Nov 20, 2025 · Nov 20, 2025 · Nov 20, 2025
diff --git a/Dockerfile b/Dockerfile
@@ -0,0 +1,64 @@
+# docker build -t oculux314/cares:base . (use --no-cache to rebuild from start)
+# docker run -it --gpus all oculux314/cares:base
+
+# Container CUDA version needs to be less than or equal to the host CUDA version. If you get an error about CUDA version mismatch, try using an older image (e.g. nvidia/cuda:12.0.1-cudnn8-runtime-ubuntu22.04) and rebuilding the container.
+FROM nvidia/cuda:12.1.1-cudnn8-runtime-ubuntu22.04
+ENV MUJOCO_GL=osmesa
+ENV CARES_LOG_BASE_DIR=/app/cares_rl_logs
+WORKDIR /app
+
+# -------------------------------------------------------------------
+# Installation
+# -------------------------------------------------------------------
+
+RUN apt-get update && apt-get install -y \
+    python-is-python3 \
+    python3-venv \
+    python3-pip \
+    git \
+    # This is needed for mujoco
+    libgl1 \
+    libglib2.0-0 \
+    libsm6 \
+    libxext6 \
+    libxrender1 \
+    libosmesa6 \
+    libosmesa6-dev \
+    mesa-utils \
+    libgl1-mesa-glx \
+    libgl1-mesa-dri \
+    nano
+
+# -------------------------------------------------------------------
+# Clone repos
+# -------------------------------------------------------------------
+
+# gymnasium_envrionments - training engine and core environments
+RUN git clone https://github.com/UoA-CARES/gymnasium_envrionments.git
+
+# cares_reinforcement_learning - RL algorithms
+RUN git clone https://github.com/UoA-CARES/cares_reinforcement_learning.git
+
+# -------------------------------------------------------------------
+# Setup cares_reinforcement_learning
+# -------------------------------------------------------------------
+
+WORKDIR /app/cares_reinforcement_learning
+RUN pip install -r requirements.txt
+RUN pip install -e .
+
+# -------------------------------------------------------------------
+# Setup gymnasium_envrionments
+# -------------------------------------------------------------------
+
+WORKDIR /app/gymnasium_envrionments
+RUN pip install -r requirements.txt
+
+# -------------------------------------------------------------------
+# Runtime
+# -------------------------------------------------------------------
+
+ENV CARES_LOG_PATH_TEMPLATE="{algorithm}/{run_name}{algorithm}-{date}"
+WORKDIR /app/gymnasium_envrionments/scripts
+CMD ["bash", "-c", "echo '======================================================================\nRun `python run.py train cli --gym openai --task HalfCheetah-v4 SAC` to start a training run.\n======================================================================' && \
+    bash"]
diff --git a/README.md b/README.md
@@ -156,3 +156,52 @@ Plot and compare the results of two or more training instances
 ```sh
 python3 plotter.py -s ~/cares_rl_logs -d ~/cares_rl_logs/ALGORITHM_A/ALGORITHM_A-TASK-YY_MM_DD:HH:MM:SS ~/cares_rl_logs/ALGORITHM_B/ALGORITHM_B-TASK-YY_MM_DD:HH:MM:SS
 ```
+
+# Running in Batch Mode
+A set of different training instances (e.g. comparing different algorithms or environments) can be run in series using batch mode. This is compatible with running seeds in parrallel.
+
+To use batch mode, append `--batch 1` to any train command, e.g.
+```
+python run.py train cli --gym dmcs --domain humanoid --task walk TD3 --batch 1
+```
+
+The specific instances to run can be configured in the `BATCH CONFIG` section of `scripts/batch_coordinator.py`. The cross product of these lists is used to create the set of instances to be run.
+<p align="center">
+    <img src="./media/batch-config.png" style="width: 80%;"/>
+</p>
+
+The format is `field: [instances]` and mirrors the configuration object used in non-batched runs. It can be useful to set a breakpoint in `run.py` to view the configuration object when editing this file.
+<p align="center">
+    <img src="./media/config-breakpoint.png" style="width: 80%;"/>
+</p>
+
+The `_skip()` function in `scripts/batch_coordinator.py` can be used to filter out undesired combinations - e.g. here, invalid domain-task pairings are skipped.
+<p align="center">
+    <img src="./media/skip-function.png" style="width: 80%;"/>
+</p>
+
+Finally, a specific range of instances can be run by specifying `--b_start` and/or `--b_end`. The run order is deterministic.
+```
+python run.py train cli --gym dmcs --domain humanoid --task walk TD3 --batch 1 --b_start 2 --b_end -2
+```
+<p align="center">
+    <img src="./media/batch-range.png" style="width: 80%;"/>
+</p>
+
+# Using Docker
+This repository can be run in a docker container using `docker run -it --gpus all oculux314/cares:base`. This will download an image of this repository, start it, and open up a bash terminal inside to run commands as usual. The `gymnasium_envrionments` `cares_reinforcement_learning` and `cares_rl_logs` folders are located in the `/app` directory within the container.
+
+To open another terminal inside the same running docker container use `docker ps -a` to find the name of the container, then `docker exec -it <container> bash`. To copy files out of the docker container, use `docker cp <container>:<path> <host-path>` from the host.
+
+In some situations, you may want to build your own version of the image (e.g. to modify some build steps). To do this run `docker build -t oculux314/cares:base .` from the root of this repository, overwriting any existing image, and then run the image as usual.
+
+### Failed to initialize NVML: Unknown Error
+
+There is a known bug where long-running Docker containers lose their nvidia session. If you see `Failed to initialize NVML: Unknown Error` (or similar), restart the docker container and resume training.
+
+```
+docker ps
+docker stop <container>
+docker start <container>
+docker exec -it <container> bash
+```
diff --git a/media/batch-config.png b/media/batch-config.png
diff --git a/media/batch-range.png b/media/batch-range.png
diff --git a/media/config-breakpoint.png b/media/config-breakpoint.png
diff --git a/media/skip-function.png b/media/skip-function.png
diff --git a/scripts/batch_coordinator.py b/scripts/batch_coordinator.py
@@ -0,0 +1,163 @@
+"""
+This module enables batch execution of multiple reinforcement learning experiments
+with varying configurations. It constructs ExecutionCoordinator instances for each
+configuration combination and prepares them for execution.
+"""
+
+import itertools
+from typing import Any
+
+from execution_coordinator import ExecutionCoordinator
+import execution_logger as logs
+from util.rl_parser import RLParser
+
+# MARK: BATCH CONFIG
+# Configure batch parameters here. The cross-product of these lists will be used
+# to create multiple experiment configurations.
+
+# Example: this will create 16 experiments for the 4 domains and 4 tasks.
+# The _skip function skips invalid combinations for 4 experiments total.
+batch_config: dict[str, list[Any | tuple[Any, str]]] = {
+    "env_config.domain": ["cheetah", "cartpole", "finger", "walker"],
+    "env_config.task": [
+        "run",
+        "swingup",
+        "spin",
+        ("walk", "CUSTOM NAME"),
+    ],  # Can also use (value, name) tuples - useful when value is an object
+}
+
+
+# This function can be customized to skip certain invalid or undesired configurations.
+def _skip(config: dict[str, tuple[Any, str]]) -> bool:
+    """Determine if a given configuration combination should be skipped.
+    E.g., task walker.catch doesn't exist.
+
+    Args:
+        config (dict[str, tuple[Any, str]]): A configuration mapping where each
+            key is a configuration attribute path and each value is a tuple of
+            (actual_value, name).
+    Returns:
+        bool: True if the configuration should be skipped, False otherwise.
+    """
+    # Example: match domain to task
+    return not (
+        (
+            config.get("env_config.domain", (None,))[0] == "cartpole"
+            and config.get("env_config.task", (None,))[0] == "swingup"
+        )
+        or (
+            config.get("env_config.domain", (None,))[0] == "finger"
+            and config.get("env_config.task", (None,))[0] == "spin"
+        )
+        or (
+            config.get("env_config.domain", (None,))[0] == "cheetah"
+            and config.get("env_config.task", (None,))[0] == "run"
+        )
+        or (
+            config.get("env_config.domain", (None,))[0] == "walker"
+            and config.get("env_config.task", (None,))[0] == "walk"
+        )
+    )
+
+
+# -------------------------------------------------------------------
+# MARK: INTERNALS
+# -------------------------------------------------------------------
+
+# Get the main logger for this function
+logger = logs.get_main_logger()
+
+
+def get_batch_coordinators() -> list[tuple[ExecutionCoordinator, str]]:
+    """Create coordinators for every combination in batch_config.
+
+    The batch_config maps attribute paths (possibly dotted, e.g. "alg_config.actor_lr")
+    to lists of values. This function expands the Cartesian product of those lists,
+    creates an ExecutionCoordinator for each combination, applies the configuration
+    values to the coordinator, and returns a list of (coordinator, run_name) tuples.
+
+    Returns:
+        list[tuple[ExecutionCoordinator, str]]: A list where each tuple contains a
+            configured ExecutionCoordinator and a human-readable run name generated
+            by get_name_from_config.
+
+    Notes:
+        - This function only constructs and names coordinators; it does not start or run them.
+        - Keys in batch_config are resolved via attribute access on the coordinator.
+    """
+    # Expand batch configs into all combinations
+    keys = list(batch_config.keys())
+    configs = [
+        _create_config(keys, config_values)
+        for config_values in itertools.product(*batch_config.values())
+    ]
+
+    coordinators: list[tuple[ExecutionCoordinator, str]] = []
+    i = 0
+    for config in configs:
+        # Certain combinations may be invalid - skip these
+        # These are DIFFERENT from the [b_start, b_end] range filtering and don't show up at all
+        if _skip(config):
+            continue
+        i += 1
+
+        # Setup specific name and coordinator
+        coordinator = _config_to_coordinator(config)
+        _replace_configurations(coordinator, config)
+        run_name = _get_name_from_config(config, i)
+        coordinator.env_config.index = i  # Set index for range filtering
+        coordinators.append((coordinator, run_name))
+
+    # Support negative indexing (i.e. replace -1 with last index)
+    num_coordinators = len(coordinators)
+    b_start = coordinators[0][0].env_config.b_start
+    b_end = coordinators[0][0].env_config.b_end
+    if b_start < 0:
+        b_start = num_coordinators + b_start + 1
+    if b_end < 0:
+        b_end = num_coordinators + b_end + 1
+    for coordinator, _ in coordinators:
+        coordinator.env_config.b_start = b_start
+        coordinator.env_config.b_end = b_end
+
+    return coordinators
+
+
+def _create_config(
+    keys: list[str], config_values: tuple[Any | tuple[Any, str], ...]
+) -> dict[str, tuple[Any, str]]:
+    config: dict[str, tuple[Any, str]] = {}
+    for i, value in enumerate(config_values):
+        # Ensure value is a tuple (actual_value, name)
+        if isinstance(value, tuple):
+            config[keys[i]] = value
+        else:
+            config[keys[i]] = (value, f"{keys[i]}-{value}")
+    return config
+
+
+def _get_name_from_config(config: dict[str, tuple[Any, str]], index: int) -> str:
+    name_parts = []
+    for value in config.values():
+        name_parts.append(value[1])
+    return f"[{index}]_" + "_".join(name_parts)
+
+
+def _config_to_coordinator(config: dict[str, tuple[Any, str]]) -> ExecutionCoordinator:
+    parser = RLParser()
+    base_configs = parser.parse_args()
+    coordinator = ExecutionCoordinator(base_configs, options={"noprint": True})
+    _replace_configurations(coordinator, config)
+    return coordinator
+
+
+def _replace_configurations(
+    coordinator: ExecutionCoordinator, config: dict[str, tuple[Any, str]]
+):
+    for key, value in config.items():
+        keys = key.split(".")
+        obj = coordinator
+        for k in keys[:-1]:
+            obj = getattr(obj, k)
+        setattr(obj, keys[-1], value[0])  # value is a tuple (actual_value, name)
diff --git a/scripts/execution_coordinator.py b/scripts/execution_coordinator.py
@@ -8,6 +8,8 @@
 import logging
 import multiprocessing
 import time
+import gc
+import torch
 from multiprocessing.queues import Queue
 from queue import Empty
 from typing import Any
@@ -41,7 +43,7 @@ class ExecutionCoordinator:
     - Testing: Testing final models only with specified episodes
     """
 
-    def __init__(self, configurations: dict[str, Any]):
+    def __init__(self, configurations: dict[str, Any], options: dict[str, Any] = {}):
         """
         Initialize the ExecutionCoordinator with parsed configurations.
 
@@ -63,7 +65,8 @@ def __init__(self, configurations: dict[str, Any]):
         self.base_log_dir: str | None = None
 
         # Log all configurations for debugging
-        self._print_configurations()
+        if not options.get("noprint", False):
+            self._print_configurations()
 
     def _print_configurations(self) -> None:
         """Log all configurations for debugging and reproducibility."""
@@ -486,3 +489,7 @@ def run(self) -> None:
             self._test()
         else:
             raise ValueError(f"Unknown command: {self.run_config.command}")
+
+        # Clean up resources after execution
+        gc.collect()
+        torch.cuda.empty_cache()