From 6cac12bff4ee622caf536db30c9a8722d9d45a8f Mon Sep 17 00:00:00 2001 From: Manuel Santana Castolo Date: Wed, 1 Oct 2025 21:36:46 +0000 Subject: [PATCH 1/2] Add Accelerator API to mario RL. - Integrate the Accelerator API to support multiple accelerators. Co-authored-by: dggaytan --- intermediate_source/mario_rl_tutorial.py | 13 ++++++++----- 1 file changed, 8 insertions(+), 5 deletions(-) diff --git a/intermediate_source/mario_rl_tutorial.py b/intermediate_source/mario_rl_tutorial.py index 03d6396a47e..72f0b8f1294 100755 --- a/intermediate_source/mario_rl_tutorial.py +++ b/intermediate_source/mario_rl_tutorial.py @@ -34,6 +34,9 @@ # pip install gym-super-mario-bros==7.4.0 # pip install tensordict==0.3.0 # pip install torchrl==0.3.0 +# pip install gymnasium +# pip install nes-py +# pip install gym-super-mario-bros # import torch @@ -101,9 +104,9 @@ # Initialize Super Mario environment (in v0.26 change render mode to 'human' to see results on the screen) if gym.__version__ < '0.26': - env = gym_super_mario_bros.make("SuperMarioBros-1-1-v0", new_step_api=True) + env = gym_super_mario_bros.make("SuperMarioBros-1-1-v3", new_step_api=True) else: - env = gym_super_mario_bros.make("SuperMarioBros-1-1-v0", render_mode='rgb', apply_api_compatibility=True) + env = gym_super_mario_bros.make("SuperMarioBros-1-1-v3", render_mode='rgb', apply_api_compatibility=True) # Limit the action-space to # 0. walk right @@ -292,7 +295,7 @@ def __init__(self, state_dim, action_dim, save_dir): self.action_dim = action_dim self.save_dir = save_dir - self.device = "cuda" if torch.cuda.is_available() else "cpu" + self.device = "xpu" if torch.xpu.is_available() else "cpu" # Mario's DNN to predict the most optimal action - we implement this in the Learn section self.net = MarioNet(self.state_dim, self.action_dim).float() @@ -735,8 +738,8 @@ def record(self, episode, epsilon, step): # In this example we run the training loop for 40 episodes, but for Mario to truly learn the ways of # his world, we suggest running the loop for at least 40,000 episodes! # -use_cuda = torch.cuda.is_available() -print(f"Using CUDA: {use_cuda}") +use_accel = torch.xpu.is_available() +print(f"Using xpu: {use_accel}") print() save_dir = Path("checkpoints") / datetime.datetime.now().strftime("%Y-%m-%dT%H-%M-%S") From 9bde31b9004649ef65577472608ef19612d4d464 Mon Sep 17 00:00:00 2001 From: Edgar Romo Montiel Date: Tue, 7 Oct 2025 16:49:15 -0700 Subject: [PATCH 2/2] Add patch to work with numpy 2.x and fix Mario x_position overflow issue Update accelerator usage for device selection Signed-off-by: Edgar Romo Montiel --- intermediate_source/mario_rl_tutorial.py | 50 +++++++++++++++++++----- 1 file changed, 40 insertions(+), 10 deletions(-) diff --git a/intermediate_source/mario_rl_tutorial.py b/intermediate_source/mario_rl_tutorial.py index 72f0b8f1294..aaed8ee6908 100755 --- a/intermediate_source/mario_rl_tutorial.py +++ b/intermediate_source/mario_rl_tutorial.py @@ -32,13 +32,24 @@ # # %%bash # pip install gym-super-mario-bros==7.4.0 -# pip install tensordict==0.3.0 -# pip install torchrl==0.3.0 -# pip install gymnasium -# pip install nes-py -# pip install gym-super-mario-bros +# pip install 'tensordict>=0.3.0' +# pip install 'torchrl>=0.3.0' +# pip install matplotlib # +######### Patch for numpy 2.x compatibility +import nes_py._rom + +def patched_prg_rom_stop(self): + return self.prg_rom_start + int(self.prg_rom_size) * 2**10 + +def patched_chr_rom_stop(self): + return self.chr_rom_start + int(self.chr_rom_size) * 2**10 + +nes_py._rom.ROM.prg_rom_stop = property(patched_prg_rom_stop) +nes_py._rom.ROM.chr_rom_stop = property(patched_chr_rom_stop) +######################################################################## + import torch from torch import nn from torchvision import transforms as T @@ -87,7 +98,6 @@ # the action in a state. We try to approximate this function. # - ###################################################################### # Environment # """""""""""""""" @@ -102,6 +112,21 @@ # (next) state, reward and other info. # + +################### Patch for NumPy 2.x: add np.bool8 alias if missing +if not hasattr(np, "bool8"): + np.bool8 = np.bool_ + + +################### Patch the _x_position property to cast RAM values to int +def patched_x_position(self): + # Cast to int to avoid numpy uint8 overflow + return int(self.ram[0x6d]) * 0x100 + int(self.ram[0x86]) + +gym_super_mario_bros.smb_env.SuperMarioBrosEnv._x_position = property(patched_x_position) + +####################################################################################### + # Initialize Super Mario environment (in v0.26 change render mode to 'human' to see results on the screen) if gym.__version__ < '0.26': env = gym_super_mario_bros.make("SuperMarioBros-1-1-v3", new_step_api=True) @@ -295,7 +320,11 @@ def __init__(self, state_dim, action_dim, save_dir): self.action_dim = action_dim self.save_dir = save_dir - self.device = "xpu" if torch.xpu.is_available() else "cpu" + use_accel = torch.accelerator.current_accelerator() + if use_accel is None: + use_accel = "cpu" + self.device = use_accel + #self.device = "xpu" if torch.xpu.is_available() else "cpu" # Mario's DNN to predict the most optimal action - we implement this in the Learn section self.net = MarioNet(self.state_dim, self.action_dim).float() @@ -738,9 +767,10 @@ def record(self, episode, epsilon, step): # In this example we run the training loop for 40 episodes, but for Mario to truly learn the ways of # his world, we suggest running the loop for at least 40,000 episodes! # -use_accel = torch.xpu.is_available() -print(f"Using xpu: {use_accel}") -print() +use_accel = torch.accelerator.current_accelerator() ###torch.xpu.is_available() +if use_accel is None: + use_accel = "cpu" +print(f"Using device: {use_accel}\n") save_dir = Path("checkpoints") / datetime.datetime.now().strftime("%Y-%m-%dT%H-%M-%S") save_dir.mkdir(parents=True)