Fix bug in omiga mujoco wrapper.

jcformanek · jcformanek · commit c6e6974cf6d9 · 2025-04-23T12:00:24.000+02:00
diff --git a/og_marl/environments.py b/og_marl/environments.py
@@ -21,6 +21,10 @@ def get_environment(source: str, env_name: str, scenario: str, seed: int = 42) -
     elif env_name == "mamujoco" and source == "og_marl":
         from og_marl.wrapped_environments.mamujoco import MAMuJoCo
 
+        return MAMuJoCo(scenario, seed=seed)
+    elif env_name == "mamujoco" and source == "omar":
+        from og_marl.wrapped_environments.mamujoco_omar import MAMuJoCo
+
         return MAMuJoCo(scenario, seed=seed)
     elif env_name == "gymnasium_mamujoco":
         from og_marl.wrapped_environments.gymnasium_mamujoco import WrappedGymnasiumMAMuJoCo
diff --git a/og_marl/tf2_systems/offline/configs/continuous_bc.yaml b/og_marl/tf2_systems/offline/configs/continuous_bc.yaml
@@ -4,10 +4,10 @@ wandb_project: og-marl
 training_steps: 5e5
 
 task:
-  source: og_marl
+  source: omiga
   env: mamujoco
-  scenario: 2halfcheetah
-  dataset: Good
+  scenario: 3hopper
+  dataset: Expert
 
 replay:
   sequence_length: 20
diff --git a/og_marl/tf2_systems/offline/configs/iddpg_bc.yaml b/og_marl/tf2_systems/offline/configs/iddpg_bc.yaml
@@ -4,10 +4,10 @@ wandb_project: og-marl
 training_steps: 5e5
 
 task:
-  source: my_datasets
-  env: gymnasium_mamujoco
-  scenario: 2reacher
-  dataset: replay
+  source: omiga
+  env: mamujoco
+  scenario: 3hopper
+  dataset: Expert
 
 replay:
   sequence_length: 20
diff --git a/og_marl/wrapped_environments/mamujoco.py b/og_marl/wrapped_environments/mamujoco.py
@@ -23,7 +23,7 @@ def get_mamujoco_args(scenario: str) -> Dict[str, Any]:
     env_args = {
         "agent_obsk": 1,
         "episode_limit": 1000,
-        "global_categories": "qvel,qpos",
+        # "global_categories": "qvel,qpos",
     }
     if scenario.lower() == "4ant":
         env_args["scenario"] = "Ant-v2"
diff --git a/og_marl/wrapped_environments/mamujoco_omar.py b/og_marl/wrapped_environments/mamujoco_omar.py
@@ -0,0 +1,77 @@
+from typing import Any, Dict
+
+import numpy as np
+
+from og_marl.custom_environments.multiagent_mujoco.mujoco_multi import MujocoMulti
+
+from og_marl.wrapped_environments.base import BaseEnvironment, ResetReturn, StepReturn
+
+
+class MAMuJoCo(BaseEnvironment):
+
+    """Environment wrapper Multi-Agent MuJoCo."""
+
+    def __init__(self, scenario: str, seed=None):
+        env_args = self._get_mamujoco_args(scenario)
+
+        self._environment = MujocoMulti(env_args=env_args)
+
+        self.possible_agents = [f"agent_{n}" for n in range(self._environment.n_agents)]
+        self._num_actions = self._environment.n_actions
+
+        self.max_episode_length = 1000
+
+    def _get_mamujoco_args(self, scenario: str) -> Dict[str, Any]:
+        env_args = {
+            "agent_obsk": 0,
+            "episode_limit": 1000,
+        }
+        if scenario.lower() == "2halfcheetah":
+            env_args["scenario"] = "HalfCheetah-v2"
+            env_args["agent_conf"] = "2x3"
+        else:
+            raise ValueError("Not a valid omar mamujoco scenario.")
+        return env_args
+    
+    def reset(self) -> ResetReturn:
+        self._environment.reset()
+
+        observations = self._environment.get_obs()
+
+        observations = {
+            agent: observations[i].astype("float32") for i, agent in enumerate(self.possible_agents)
+        }
+
+        info = {"state": self._environment.get_state()}
+
+        return observations, info
+
+    def step(self, actions: Dict[str, np.ndarray]) -> StepReturn:
+        mujoco_actions = []
+        for agent in self.possible_agents:
+            mujoco_actions.append(actions[agent])
+
+        reward, done, info = self._environment.step(mujoco_actions)
+
+        terminals = {agent: done for agent in self.possible_agents}
+        trunctations = {agent: False for agent in self.possible_agents}
+
+        rewards = {agent: reward for agent in self.possible_agents}
+
+        observations = self._environment.get_obs()
+
+        observations = {
+            agent: observations[i].astype("float32") for i, agent in enumerate(self.possible_agents)
+        }
+
+        info = {}
+        info["state"] = self._environment.get_state()
+
+        return observations, rewards, terminals, trunctations, info  # type: ignore
+
+    def __getattr__(self, name: str) -> Any:
+        """Expose any other attributes of the underlying environment."""
+        if hasattr(self.__class__, name):
+            return self.__getattribute__(name)
+        else:
+            return getattr(self._environment, name)
diff --git a/og_marl/wrapped_environments/mamujoco_omiga.py b/og_marl/wrapped_environments/mamujoco_omiga.py
@@ -93,7 +93,7 @@ def add_agent_id_and_normalise(self, observations):
             one_hot[i] = 1
             agent_obs = observations[agent].astype("float32")
             agent_obs = np.concatenate([agent_obs, one_hot], axis=-1)
-            agent_obs = agent_obs - np.mean(agent_obs) / np.std(agent_obs)
+            agent_obs = (agent_obs - np.mean(agent_obs)) / np.std(agent_obs)
             observations[agent] = agent_obs
         return observations
 

Original file line number	Diff line number	Diff line change
`@@ -23,7 +23,7 @@ def get_mamujoco_args(scenario: str) -> Dict[str, Any]:`
`23`	`23`	`env_args = {`
`24`	`24`	`"agent_obsk": 1,`
`25`	`25`	`"episode_limit": 1000,`
`26`		`- "global_categories": "qvel,qpos",`
	`26`	`+ # "global_categories": "qvel,qpos",`
`27`	`27`	`}`
`28`	`28`	`if scenario.lower() == "4ant":`
`29`	`29`	`env_args["scenario"] = "Ant-v2"`