Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions requirements.txt
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,7 @@ gymnasium==1.1.1
gymnasium[classic-control]==1.1.1
gymnasium[mujoco]==1.1.1
mujoco==3.2.6
envpool==0.8.4

mediapy==1.1.9
natsort==8.4.0
Expand Down
3 changes: 2 additions & 1 deletion scripts/base_runner.py
Original file line number Diff line number Diff line change
Expand Up @@ -136,8 +136,9 @@ def __init__(
self.logger.info(
f"[SEED {self.train_seed} | {self.eval_seed}] Loading Environment: {self.env_config.gym}"
)

self.env, self.env_eval = self.env_factory.create_environment(
self.env_config, self.alg_config.image_observation
self.env_config, self.alg_config.image_observation, train_seed, eval_seed
)

# Set the seed for everything
Expand Down
Empty file.
90 changes: 90 additions & 0 deletions scripts/environments/atari/atari_environment.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,90 @@
from functools import cached_property

import cv2
import numpy as np
from environments.gym_environment import GymEnvironment
from gymnasium import spaces
from util.configurations import AtariConfig
import envpool


class AtariEnvironment(GymEnvironment):
def __init__(self, config: AtariConfig, seed: int, evaluation: bool) -> None:
super().__init__(config)
self.env = envpool.make_gymnasium(
config.task,
num_envs=1,
seed=seed,
img_width=config.frame_width,
img_height=config.frame_height,
episodic_life=evaluation,
reward_clip=evaluation,
Comment on lines +20 to +21
Copy link

Copilot AI Dec 15, 2025

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

The evaluation parameter is inverted for episodic_life and reward_clip. In Atari, these settings should be disabled during evaluation (set to False) for standard evaluation, and enabled during training (set to True). Currently, when evaluation=True, these are set to True, which is the opposite of the intended behavior.

Suggested change
episodic_life=evaluation,
reward_clip=evaluation,
episodic_life=not evaluation,
reward_clip=not evaluation,

Copilot uses AI. Check for mistakes.
stack_num=config.frames_to_stack,
)
if config.display == 1:
self.name = f"{config.task}-{seed}"
cv2.namedWindow(self.name, cv2.WINDOW_GUI_NORMAL)

self.reset()

@cached_property
def max_action_value(self) -> float:
return self.env.action_space.high[0]

@cached_property
def min_action_value(self) -> float:
return self.env.action_space.low[0]
Comment on lines +32 to +36
Copy link

Copilot AI Dec 15, 2025

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

For Atari environments with Discrete action spaces, attempting to access .high[0] and .low[0] will raise an AttributeError because Discrete spaces don't have these attributes. These properties are only applicable to Box action spaces, but Atari games use Discrete action spaces.

Suggested change
return self.env.action_space.high[0]
@cached_property
def min_action_value(self) -> float:
return self.env.action_space.low[0]
if isinstance(self.env.action_space, spaces.Box):
return self.env.action_space.high[0]
elif isinstance(self.env.action_space, spaces.Discrete):
return self.env.action_space.n - 1
else:
raise ValueError(f"Unhandled action space type: {type(self.env.action_space)}")
@cached_property
def min_action_value(self) -> float:
if isinstance(self.env.action_space, spaces.Box):
return self.env.action_space.low[0]
elif isinstance(self.env.action_space, spaces.Discrete):
return 0
else:
raise ValueError(f"Unhandled action space type: {type(self.env.action_space)}")

Copilot uses AI. Check for mistakes.

@cached_property
def observation_space(self) -> tuple:
obs_shape = self.env.observation_space.shape
return obs_shape

@cached_property
def action_num(self) -> int:
if isinstance(self.env.action_space, spaces.Box):
action_num = self.env.action_space.shape[0]
elif isinstance(self.env.action_space, spaces.Discrete):
action_num = self.env.action_space.n
else:
raise ValueError(
f"Unhandled action space type: {type(self.env.action_space)}"
)
return action_num

def sample_action(self) -> int:
return np.array([self.env.action_space.sample()], dtype=int)

def set_seed(self, seed: int) -> None:
self.env.reset()
Copy link

Copilot AI Dec 15, 2025

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Calling self.env.reset() without using seed parameter doesn't actually seed the environment. This method should accept and use the seed parameter to properly seed the environment, similar to how OpenAIEnvironment handles it with self.env.reset(seed=seed).

Suggested change
self.env.reset()
self.env.reset(seed=seed)

Copilot uses AI. Check for mistakes.
self.env.action_space.seed(seed)
self.env.observation_space.seed(seed)

def reset(self, training: bool = True) -> np.ndarray:
state, _ = self.env.reset()
self.state = state[0]
return self.state

def _step(self, action: int) -> tuple:
state, reward, terminated, truncated, info = self.env.step(action)
self.state = state[0]
return state[0], reward[0], terminated[0], truncated[0], {}

def grab_frame(self, height: int = 232, width: int = 232) -> np.ndarray:
if len(self.state.shape) == 4:
# RGB
frame = cv2.cvtColor(np.moveaxis(self.state[-3:], 0, -1), cv2.COLOR_RGB2BGR)
else:
# Grayscale
frame = self.state[-1]
frame = np.stack([frame] * 3, axis=-1)
return cv2.resize(frame, (width, height), interpolation=cv2.INTER_CUBIC)

def render(self):
frame = self.grab_frame()
cv2.imshow(self.name, frame)
cv2.waitKey(1)
Comment on lines +83 to +86
Copy link

Copilot AI Dec 15, 2025

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

The window name is created using the task name and seed, but if the config.display is not 1, the self.name attribute won't be set. This will cause an AttributeError when render() is called and tries to use self.name. Consider initializing self.name regardless of the display setting or adding a conditional check in the render method.

Copilot uses AI. Check for mistakes.

def get_overlay_info(self) -> dict:
Comment on lines +80 to +88
Copy link

Copilot AI Dec 15, 2025

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

The grab_frame method assumes that if state.shape has 4 dimensions, it's RGB and takes the last 3 channels with self.state[-3:]. However, with envpool's frame stacking (stack_num=4), the observation shape will be (4, 84, 84) for grayscale Atari games. This means len(self.state.shape) == 3, not 4, so it would go to the else branch. The logic should check the actual channel dimension rather than the number of dimensions to distinguish between RGB and grayscale stacked frames.

Copilot uses AI. Check for mistakes.
# TODO: Add overlay information for gyms as needed
return {}
14 changes: 12 additions & 2 deletions scripts/environments/environment_factory.py
Original file line number Diff line number Diff line change
Expand Up @@ -14,7 +14,11 @@ def __init__(self) -> None:
pass

def create_environment(
self, config: GymEnvironmentConfig, image_observation
self,
config: GymEnvironmentConfig,
image_observation,
train_seed: int,
eval_seed: int,
) -> tuple[
BaseEnvironment | MultiModalWrapper,
BaseEnvironment | MultiModalWrapper,
Expand Down Expand Up @@ -69,8 +73,14 @@ def create_environment(

env = SMAC2Environment(config, evaluation=False)
eval_env = SMAC2Environment(config, evaluation=True)
elif isinstance(config, cfg.AtariConfig):
from environments.atari.atari_environment import AtariEnvironment

env = AtariEnvironment(config, train_seed, evaluation=False)
eval_env = AtariEnvironment(config, eval_seed, evaluation=True)
image_observation = False
else:
raise ValueError(f"Unkown environment: {type(config)}")
raise ValueError(f"Unknown environment: {type(config)}")

if isinstance(env, GymEnvironment) and isinstance(eval_env, GymEnvironment):
env = MultiModalWrapper(config, env) if bool(image_observation) else env
Expand Down
7 changes: 6 additions & 1 deletion scripts/training_runner.py
Original file line number Diff line number Diff line change
Expand Up @@ -303,6 +303,8 @@ def run_training(self) -> None:
state = self.env.reset()
episode_start = time.time()

train_info: dict = {}

# Main training loop
train_step_counter = self.start_training_step
for train_step_counter in range(
Expand Down Expand Up @@ -351,7 +353,10 @@ def run_training(self) -> None:
# total_reward += intrinsic_reward
# info["intrinsic_reward"] = intrinsic_reward

# entropy = self.agent.get_action_entropy(state)

# Store experience in memory
# self.memory.add(state, normalised_action, total_reward, next_state, done, entropy)
self.memory.add(state, normalised_action, total_reward, next_state, done)

state = next_state
Expand All @@ -370,7 +375,6 @@ def run_training(self) -> None:
episode_stats.get_episode_reward(),
episode_end,
)
info |= train_info

# Evaluate agent periodically
if (train_step_counter + 1) % self.number_steps_per_evaluation == 0:
Expand All @@ -382,6 +386,7 @@ def run_training(self) -> None:
# Handle episode completion
if episode_end:
episode_time = time.time() - episode_start
info |= train_info

info.update(episode_stats.summary())

Expand Down
5 changes: 5 additions & 0 deletions scripts/util/configurations.py
Original file line number Diff line number Diff line change
Expand Up @@ -48,6 +48,11 @@ def model_dump(self, *args, **kwargs):
return data


class AtariConfig(GymEnvironmentConfig):
gym: ClassVar[str] = "atari"
frames_to_stack: int = 4


class OpenAIConfig(GymEnvironmentConfig):
gym: ClassVar[str] = "openai"

Expand Down