From 6cac12bff4ee622caf536db30c9a8722d9d45a8f Mon Sep 17 00:00:00 2001
From: Manuel Santana Castolo <manuel.santana.castolo@intel.com>
Date: Wed, 1 Oct 2025 21:36:46 +0000
Subject: [PATCH 1/2] Add Accelerator API to mario RL.

- Integrate the Accelerator API to support multiple accelerators.

Co-authored-by: dggaytan <diana.gaytan.munoz@intel.com>
---
 intermediate_source/mario_rl_tutorial.py | 13 ++++++++-----
 1 file changed, 8 insertions(+), 5 deletions(-)

diff --git a/intermediate_source/mario_rl_tutorial.py b/intermediate_source/mario_rl_tutorial.py
index 03d6396a47e..72f0b8f1294 100755
--- a/intermediate_source/mario_rl_tutorial.py
+++ b/intermediate_source/mario_rl_tutorial.py
@@ -34,6 +34,9 @@
 #      pip install gym-super-mario-bros==7.4.0
 #      pip install tensordict==0.3.0
 #      pip install torchrl==0.3.0
+#      pip install gymnasium
+#      pip install nes-py
+#      pip install gym-super-mario-bros
 #
 
 import torch
@@ -101,9 +104,9 @@
 
 # Initialize Super Mario environment (in v0.26 change render mode to 'human' to see results on the screen)
 if gym.__version__ < '0.26':
-    env = gym_super_mario_bros.make("SuperMarioBros-1-1-v0", new_step_api=True)
+    env = gym_super_mario_bros.make("SuperMarioBros-1-1-v3", new_step_api=True)
 else:
-    env = gym_super_mario_bros.make("SuperMarioBros-1-1-v0", render_mode='rgb', apply_api_compatibility=True)
+    env = gym_super_mario_bros.make("SuperMarioBros-1-1-v3", render_mode='rgb', apply_api_compatibility=True)
 
 # Limit the action-space to
 #   0. walk right
@@ -292,7 +295,7 @@ def __init__(self, state_dim, action_dim, save_dir):
         self.action_dim = action_dim
         self.save_dir = save_dir
 
-        self.device = "cuda" if torch.cuda.is_available() else "cpu"
+        self.device = "xpu" if torch.xpu.is_available() else "cpu"
 
         # Mario's DNN to predict the most optimal action - we implement this in the Learn section
         self.net = MarioNet(self.state_dim, self.action_dim).float()
@@ -735,8 +738,8 @@ def record(self, episode, epsilon, step):
 # In this example we run the training loop for 40 episodes, but for Mario to truly learn the ways of
 # his world, we suggest running the loop for at least 40,000 episodes!
 #
-use_cuda = torch.cuda.is_available()
-print(f"Using CUDA: {use_cuda}")
+use_accel = torch.xpu.is_available()
+print(f"Using xpu: {use_accel}")
 print()
 
 save_dir = Path("checkpoints") / datetime.datetime.now().strftime("%Y-%m-%dT%H-%M-%S")

From 9bde31b9004649ef65577472608ef19612d4d464 Mon Sep 17 00:00:00 2001
From: Edgar Romo Montiel <edgar.romo.montiel@intel.com>
Date: Tue, 7 Oct 2025 16:49:15 -0700
Subject: [PATCH 2/2] Add patch to work with numpy 2.x and fix Mario x_position
 overflow issue Update accelerator usage for device selection

Signed-off-by: Edgar Romo Montiel <edgar.romo.montiel@intel.com>
---
 intermediate_source/mario_rl_tutorial.py | 50 +++++++++++++++++++-----
 1 file changed, 40 insertions(+), 10 deletions(-)

diff --git a/intermediate_source/mario_rl_tutorial.py b/intermediate_source/mario_rl_tutorial.py
index 72f0b8f1294..aaed8ee6908 100755
--- a/intermediate_source/mario_rl_tutorial.py
+++ b/intermediate_source/mario_rl_tutorial.py
@@ -32,13 +32,24 @@
 #
 #      %%bash
 #      pip install gym-super-mario-bros==7.4.0
-#      pip install tensordict==0.3.0
-#      pip install torchrl==0.3.0
-#      pip install gymnasium
-#      pip install nes-py
-#      pip install gym-super-mario-bros
+#      pip install 'tensordict>=0.3.0'
+#      pip install 'torchrl>=0.3.0'
+#      pip install matplotlib
 #
 
+######### Patch for numpy 2.x compatibility
+import nes_py._rom
+
+def patched_prg_rom_stop(self):
+    return self.prg_rom_start + int(self.prg_rom_size) * 2**10
+
+def patched_chr_rom_stop(self):
+    return self.chr_rom_start + int(self.chr_rom_size) * 2**10
+
+nes_py._rom.ROM.prg_rom_stop = property(patched_prg_rom_stop)
+nes_py._rom.ROM.chr_rom_stop = property(patched_chr_rom_stop)
+########################################################################
+
 import torch
 from torch import nn
 from torchvision import transforms as T
@@ -87,7 +98,6 @@
 # the action in a state. We try to approximate this function.
 #
 
-
 ######################################################################
 # Environment
 # """"""""""""""""
@@ -102,6 +112,21 @@
 # (next) state, reward and other info.
 #
 
+
+################### Patch for NumPy 2.x: add np.bool8 alias if missing
+if not hasattr(np, "bool8"):
+    np.bool8 = np.bool_
+
+
+################### Patch the _x_position property to cast RAM values to int
+def patched_x_position(self):
+    # Cast to int to avoid numpy uint8 overflow
+    return int(self.ram[0x6d]) * 0x100 + int(self.ram[0x86])
+
+gym_super_mario_bros.smb_env.SuperMarioBrosEnv._x_position = property(patched_x_position)
+
+#######################################################################################
+
 # Initialize Super Mario environment (in v0.26 change render mode to 'human' to see results on the screen)
 if gym.__version__ < '0.26':
     env = gym_super_mario_bros.make("SuperMarioBros-1-1-v3", new_step_api=True)
@@ -295,7 +320,11 @@ def __init__(self, state_dim, action_dim, save_dir):
         self.action_dim = action_dim
         self.save_dir = save_dir
 
-        self.device = "xpu" if torch.xpu.is_available() else "cpu"
+        use_accel = torch.accelerator.current_accelerator()
+        if use_accel is None:
+            use_accel = "cpu"
+        self.device = use_accel 
+        #self.device = "xpu" if torch.xpu.is_available() else "cpu"
 
         # Mario's DNN to predict the most optimal action - we implement this in the Learn section
         self.net = MarioNet(self.state_dim, self.action_dim).float()
@@ -738,9 +767,10 @@ def record(self, episode, epsilon, step):
 # In this example we run the training loop for 40 episodes, but for Mario to truly learn the ways of
 # his world, we suggest running the loop for at least 40,000 episodes!
 #
-use_accel = torch.xpu.is_available()
-print(f"Using xpu: {use_accel}")
-print()
+use_accel = torch.accelerator.current_accelerator()  ###torch.xpu.is_available()
+if use_accel is None:
+    use_accel = "cpu"
+print(f"Using device: {use_accel}\n")
 
 save_dir = Path("checkpoints") / datetime.datetime.now().strftime("%Y-%m-%dT%H-%M-%S")
 save_dir.mkdir(parents=True)