GPUOpen-LibrariesAndSDKs
diff --git a/‎Docs/Sphinx/examples/example_outlines/pong.rst‎
Lines changed: 1 addition & 1 deletion b/‎Docs/Sphinx/examples/example_outlines/pong.rst‎
Lines changed: 1 addition & 1 deletion
diff --git a/‎Proto/GymConnector.proto‎
Lines changed: 10 additions & 1 deletion b/‎Proto/GymConnector.proto‎
Lines changed: 10 additions & 1 deletion
diff --git a/‎Resources/python/schola/core/env.py‎
Lines changed: 24 additions & 1 deletion b/‎Resources/python/schola/core/env.py‎
Lines changed: 24 additions & 1 deletion
diff --git a/‎Resources/python/schola/core/spaces/box.py‎
Lines changed: 1 addition & 1 deletion b/‎Resources/python/schola/core/spaces/box.py‎
Lines changed: 1 addition & 1 deletion
diff --git a/‎Resources/python/schola/generated/GymConnector_pb2.py‎
Lines changed: 16 additions & 14 deletions b/‎Resources/python/schola/generated/GymConnector_pb2.py‎
Lines changed: 16 additions & 14 deletions
diff --git a/‎Resources/python/schola/generated/GymConnector_pb2.pyi‎
Lines changed: 10 additions & 2 deletions b/‎Resources/python/schola/generated/GymConnector_pb2.pyi‎
Lines changed: 10 additions & 2 deletions
diff --git a/‎Resources/python/schola/gym/env.py‎
Lines changed: 45 additions & 2 deletions b/‎Resources/python/schola/gym/env.py‎
Lines changed: 45 additions & 2 deletions
diff --git a/‎Resources/python/schola/gym/utils.py‎
Lines changed: 24 additions & 0 deletions b/‎Resources/python/schola/gym/utils.py‎
Lines changed: 24 additions & 0 deletions
diff --git a/‎Resources/python/schola/ray/env.py‎
Lines changed: 3 additions & 2 deletions b/‎Resources/python/schola/ray/env.py‎
Lines changed: 3 additions & 2 deletions
@@ -6,7 +6,7 @@ The Pong environment features two agents playing a collaborative game of pong. T
 .. csv-table::
 
     "Num Agents", "2"
-    "Observation Space", "DictSpace({'Camera_SCS_SceneColorHDR_RTF_RGBA8_R_W16_H16': make_camera_space(64,64,num_channels=1)})"
+    "Observation Space", "DictSpace({'Camera_SCS_SceneColorHDR_RTF_RGBA8_R_W16_H16': make_camera_space(16,16,num_channels=1)})"
     "Action Space", "DictSpace({'Teleport_Y_50,00': DiscreteSpace(3)})"
     "Num Vectorized Copies", "2"
 
 
@@ -37,7 +37,16 @@ message TrainingStateUpdate {
 
 message TrainingDefinitionRequest {}
 
-message GymConnectorStartRequest {}
+enum AutoResetType {
+    SAMESTEP = 0; //Default value is SAMESTEP for backwards compatibility
+    NEXTSTEP = 1;
+    DISABLED = 2;
+}
+
+
+message GymConnectorStartRequest {
+    AutoResetType autoreset_type = 1;
+}
 
 //this could potentially get an observation at some point
 message GymConnectorStartResponse {}
 
@@ -17,6 +17,20 @@
 
 
 T = TypeVar("T")
+import sys
+
+if sys.version_info >= (3, 11):
+    from enum import StrEnum
+else:
+    from backports.strenum import StrEnum
+
+class AutoResetType(StrEnum):
+    """
+    Enum for Auto Reset Types.
+    """
+    DISABLED = "Disabled"
+    SAME_STEP = "SameStep"
+    NEXT_STEP = "NextStep"
 
 # A Dictionary, with EnvIds as keys and a Dictionary of AgentIds to some TypeVar as Value.
 EnvAgentIdDict = Dict[int,Dict[int,T]]
@@ -33,6 +47,8 @@ class ScholaEnv:
         The verbosity level for the environment.
     environment_start_timeout : int, default=45
         The time to wait for the environment to start in seconds.
+    auto_reset_type : AutoResetType, default=AutoResetType.SAME_STEP
+        The type of auto-reset for the environment. See Gymnasium for more details on the different modes. Only Disabled, and SameStep are currently supported.
     
     Attributes
     ----------
@@ -66,6 +82,7 @@ def __init__(
         unreal_connection : UnrealConnection,
         verbosity:int=0,
         environment_start_timeout:int = 45,
+        auto_reset_type : AutoResetType = AutoResetType.SAME_STEP,
     ):
 
         log_level = logging.WARNING
@@ -83,8 +100,14 @@ def __init__(
         atexit.register(self.close)
         self.gym_stub : gym_grpc.GymServiceStub = self.unreal_connection.connect_stubs(gym_grpc.GymServiceStub)[0]
 
-        #Server might be booting up if we have a standalone connection, so we wait for 15 to verify
+        #Server might be booting up if we have a standalone connection, so we wait for 45 to verify
         start_msg = gym_communication.GymConnectorStartRequest()
+        if(auto_reset_type == AutoResetType.DISABLED):
+            start_msg.autoreset_type = gym_communication.DISABLED
+        elif(auto_reset_type == AutoResetType.SAME_STEP):
+            start_msg.autoreset_type = gym_communication.SAMESTEP
+        elif(auto_reset_type == AutoResetType.NEXT_STEP):
+            start_msg.autoreset_type = gym_communication.NEXTSTEP
         self.gym_stub.StartGymConnector(start_msg, timeout=environment_start_timeout, wait_for_ready=True)
 
         logging.info("requesting environment definition")
 
@@ -140,5 +140,5 @@ def __len__(self) -> int:
         return self.low.size
 
     def process_data(self, msg : proto_points.FundamentalPoint) -> np.ndarray:
-        output = np.asarray(msg.box_point.values).reshape(self.shape)
+        output = np.asarray(msg.box_point.values,dtype=np.float32).reshape(self.shape)
         return output
@@ -9,8 +9,11 @@ from typing import ClassVar as _ClassVar, Mapping as _Mapping, Optional as _Opti
 
 CLOSED: CommunicatorStatus
 DESCRIPTOR: _descriptor.FileDescriptor
+DISABLED: AutoResetType
 ERROR: CommunicatorStatus
 GOOD: CommunicatorStatus
+NEXTSTEP: AutoResetType
+SAMESTEP: AutoResetType
 
 class EnvironmentReset(_message.Message):
     __slots__ = ["options", "seed"]
@@ -36,8 +39,10 @@ class EnvironmentStateUpdate(_message.Message):
     def __init__(self, reset: _Optional[_Union[EnvironmentReset, _Mapping]] = ..., step: _Optional[_Union[_StateUpdates_pb2.EnvironmentStep, _Mapping]] = ...) -> None: ...
 
 class GymConnectorStartRequest(_message.Message):
-    __slots__ = []
-    def __init__(self) -> None: ...
+    __slots__ = ["autoreset_type"]
+    AUTORESET_TYPE_FIELD_NUMBER: _ClassVar[int]
+    autoreset_type: AutoResetType
+    def __init__(self, autoreset_type: _Optional[_Union[AutoResetType, str]] = ...) -> None: ...
 
 class GymConnectorStartResponse(_message.Message):
     __slots__ = []
@@ -81,3 +86,6 @@ class TrainingStateUpdate(_message.Message):
 
 class CommunicatorStatus(int, metaclass=_enum_type_wrapper.EnumTypeWrapper):
     __slots__ = []
+
+class AutoResetType(int, metaclass=_enum_type_wrapper.EnumTypeWrapper):
+    __slots__ = []
@@ -4,9 +4,9 @@
 Implementation of gym.vector.VectorEnv backed by a Schola Environment.
 """
 
-from typing import Dict, List, Tuple, TypeVar, Union
+from typing import Dict, List, Optional, Tuple, TypeVar, Union
 from schola.core.unreal_connections import UnrealConnection
-from schola.core.env import ScholaEnv
+from schola.core.env import AutoResetType, ScholaEnv
 from schola.core.error_manager import EnvironmentException
 import numpy as np
 import gymnasium as gym
@@ -16,6 +16,47 @@
 
 T = TypeVar("T")
 
+class GymEnv(gym.Env):
+
+    def __init__(self, 
+                 unreal_connection: UnrealConnection,
+                 verbosity: int = 0):
+        
+        self._env = ScholaEnv(
+            unreal_connection,
+            verbosity= verbosity,
+            auto_reset_type=AutoResetType.DISABLED
+        )
+        self.id_manager = IdManager(self._env.ids)
+        
+        self.observation_space = self._env.get_obs_space(env_id=0, agent_id=0)
+        self.action_space = self._env.get_action_space(env_id=0, agent_id=0)
+        try:
+            assert self.id_manager.num_ids == 1, "GymEnv is designed for single-agent non-vectorized environments only. Please use GymVectorEnv for multi-agent or vectorized environments."
+        except Exception as e:
+            self._env.close()
+            raise e
+    
+    def close(self) -> None:
+        """
+        Close the environment and release resources.
+        """
+        super().close()
+        # Close the environment connection
+        return self._env.close()
+
+    def reset(self, seed: Optional[int] = None, options: Optional[Dict[str, str]] = None) -> Tuple[Dict[str, np.ndarray], Dict[int, Dict[str, str]]]:
+        super().reset(seed=seed, options=options)
+        obs, nested_infos = self._env.hard_reset(env_ids=[0],seeds=seed,options=options)
+        return obs[0][0], nested_infos[0][0]
+    
+    def step(self, action: Dict[str, np.ndarray]) -> Tuple[Dict[str, np.ndarray], float, bool, bool, Dict[str, str]]:
+        self._env.send_actions({0: {0:action}})  # Send action for the first (and only) environment
+        observations, rewards, terminateds, truncateds, nested_infos = self._env.poll()
+        observations, rewards, terminated, truncated,infos = observations[0][0], rewards[0][0], terminateds[0][0], truncateds[0][0], nested_infos[0][0]
+        return observations, rewards, terminated, truncated, infos
+
+
 class GymVectorEnv(gym.vector.VectorEnv):
     """
     A Gym Vector Environment that wraps a Schola Environment.
@@ -40,7 +81,9 @@ def __init__(
         self._env = ScholaEnv(
             unreal_connection,
             verbosity,
+            auto_reset_type=AutoResetType.SAME_STEP,
         )
+        
         self.id_manager = IdManager(self._env.ids)
         # we just use the default UID to get the shared definition
         single_obs_space = self._env.get_obs_space(*self.id_manager[0])
 
@@ -0,0 +1,24 @@
+# Copyright (c) 2025 Advanced Micro Devices, Inc. All Rights Reserved.
+
+from gymnasium.spaces import Dict
+from gymnasium import Env, Wrapper
+
+
+class PopActionWrapper(Wrapper):
+    """
+    A wrapper that pops the action from the environment's action space.
+    This is useful for environments where the action space is a dictionary
+    and we want to use only one of the actions.
+    """
+    
+    def __init__(self, env: Env):
+        super().__init__(env)
+        assert isinstance(env.action_space, Dict), "Action space must be a Dictionary Space."
+        # Pop the first action from the action space
+        self.key, self.action_space = list(env.action_space.spaces.items())[0]
+    
+    def step(self, action):
+        """
+        Step the environment with the given action.
+        """
+        return self.env.step({self.key: action})
@@ -8,7 +8,7 @@
 import logging
 
 from schola.core.unreal_connections import UnrealConnection
-from schola.core.env import ScholaEnv, EnvAgentIdDict
+from schola.core.env import AutoResetType, ScholaEnv, EnvAgentIdDict
 from schola.core.spaces import (
     DictSpace,
 )
@@ -72,7 +72,7 @@ def __init__(
     ):
         self.first_poll = True
 
-        self._env = ScholaEnv(unreal_connection, verbosity)
+        self._env = ScholaEnv(unreal_connection, verbosity, auto_reset_type=AutoResetType.SAME_STEP)
         self.last_reset_obs = {}
         self.last_reset_infos = {}
 
@@ -225,3 +225,4 @@ def try_reset(
 
     def stop(self) -> None:
         self._env.close()
+