instadeepai
diff --git a/‎og_marl/custom_environments/warehouse/multiagentenv.py
Lines changed: 68 additions & 0 deletions b/‎og_marl/custom_environments/warehouse/multiagentenv.py
Lines changed: 68 additions & 0 deletions
diff --git a/‎og_marl/custom_environments/warehouse/observation_parser.py
Lines changed: 70 additions & 0 deletions b/‎og_marl/custom_environments/warehouse/observation_parser.py
Lines changed: 70 additions & 0 deletions
diff --git a/‎og_marl/custom_environments/warehouse/reward_calculator.py
Lines changed: 61 additions & 0 deletions b/‎og_marl/custom_environments/warehouse/reward_calculator.py
Lines changed: 61 additions & 0 deletions
diff --git a/‎og_marl/custom_environments/warehouse/warehouse_const.py
Lines changed: 16 additions & 0 deletions b/‎og_marl/custom_environments/warehouse/warehouse_const.py
Lines changed: 16 additions & 0 deletions
@@ -0,0 +1,68 @@
+from __future__ import absolute_import
+from __future__ import division
+from __future__ import print_function
+
+
+class MultiAgentEnv(object):
+
+    def step(self, actions):
+        """Returns reward, terminated, info."""
+        raise NotImplementedError
+
+    def get_obs(self):
+        """Returns all agent observations in a list."""
+        raise NotImplementedError
+
+    def get_obs_agent(self, agent_id):
+        """Returns observation for agent_id."""
+        raise NotImplementedError
+
+    def get_obs_size(self):
+        """Returns the size of the observation."""
+        raise NotImplementedError
+
+    def get_state(self):
+        """Returns the global state."""
+        raise NotImplementedError
+
+    def get_state_size(self):
+        """Returns the size of the global state."""
+        raise NotImplementedError
+
+    def get_avail_actions(self):
+        """Returns the available actions of all agents in a list."""
+        raise NotImplementedError
+
+    def get_avail_agent_actions(self, agent_id):
+        """Returns the available actions for agent_id."""
+        raise NotImplementedError
+
+    def get_total_actions(self):
+        """Returns the total number of actions an agent could ever take."""
+        raise NotImplementedError
+
+    def reset(self):
+        """Returns initial observations and states."""
+        raise NotImplementedError
+
+    def render(self):
+        raise NotImplementedError
+
+    def close(self):
+        raise NotImplementedError
+
+    def seed(self):
+        raise NotImplementedError
+
+    def save_replay(self):
+        """Save a replay."""
+        raise NotImplementedError
+
+    def get_env_info(self):
+        env_info = {"state_shape": self.get_state_size(),
+                    "obs_shape": self.get_obs_size(),
+                    "n_actions": self.get_total_actions(),
+                    "n_agents": self.n_agents,
+                    "episode_limit": self.episode_limit,
+                    "unit_dim": self.unit_dim}
+        return env_info
@@ -0,0 +1,70 @@
+import numpy as np
+from dataclasses import dataclass
+
+DIRECTION_UP = 0
+DIRECTION_DOWN = 1
+DIRECTION_LEFT = 2
+DIRECTION_RIGHT = 3
+
+
+@dataclass
+class NearInformation:
+    x: int
+    y: int
+    is_agent: bool
+    agent_direction: int
+    is_shelf: bool
+    is_requested_shelf: bool
+
+
+@dataclass
+class Observation:
+    x: int
+    y: int
+    is_carrying: bool
+    direction: int
+    is_path_location: bool
+    near_info: list
+
+
+class ObservationParser:
+
+    @staticmethod
+    def chunks(lst, n):
+        """Yield successive n-sized chunks from lst."""
+        for i in range(0, len(lst), n):
+            yield lst[i:i + n]
+
+    @staticmethod
+    def parse(obs):
+        parsed_obs = Observation(
+            x=obs[0], 
+            y=obs[1], 
+            is_carrying=obs[2] == 1.0, 
+            direction=int(np.argmax(obs[3:7])),
+            is_path_location=obs[7] == 1.0, 
+            near_info=ObservationParser.parse_near_info(obs)
+        )
+        return parsed_obs
+
+    @staticmethod
+    def parse_near_info(obs):
+        agent_x = obs[0]
+        agent_y = obs[1]
+
+        near_info = []
+        infos = list(ObservationParser.chunks(obs[8:], 7))
+        
+        for i, info in enumerate(infos):
+            row = i // 3
+            col = i % 3
+            near_info.append(NearInformation(
+                x=agent_x - 1 + row,
+                y=agent_y - 1 + col,
+                is_agent=info[0] == 1.0,
+                agent_direction=int(np.argmax(info[1:5])),
+                is_shelf=info[5] == 1.0, 
+                is_requested_shelf=info[6] == 1.0
+            ))
+
+        return near_info
@@ -0,0 +1,61 @@
+import math
+from statistics import mean
+
+from .observation_parser import NearInformation, Observation, ObservationParser
+
+
+class RewardCalculator:
+
+    @staticmethod
+    def position_reward(env, x, y):
+        max_dist = []
+        dist = []
+
+        for goal in env.goals:
+            goal_x = goal[0]
+            goal_y = goal[1]
+            dist.append(math.hypot(goal_x - x, goal_y - y))
+            max_dist.append(math.hypot(goal_x - 0, goal_y - 0))
+        return 0.0005 * (mean(max_dist) - mean(dist)) / mean(max_dist)
+
+    @staticmethod
+    def is_center_shelf(obs: Observation, is_requested: bool) -> bool:
+        center_info: NearInformation = obs.near_info[4]
+        return center_info.is_shelf and center_info.is_requested_shelf
+
+    @staticmethod
+    def find_requested_shelf(obs: Observation) -> NearInformation:
+        near_info = obs.near_info
+        info: NearInformation
+        for info in near_info:
+            if info.is_shelf and info.is_requested_shelf:
+                return info
+        return None
+
+    @staticmethod
+    def calculate(env, reward, prev_obs, obs):
+        obs: Observation = ObservationParser.parse(obs)
+
+        # requested shelf
+        if RewardCalculator.is_center_shelf(obs, True):
+            if obs.is_carrying:
+                reward += 0.006
+            else:
+                reward += 0.003
+
+            # reward += RewardCalculator.position_reward(env, obs.x, obs.y)
+        #
+        # # non requested shelf
+        # if RewardCalculator.is_center_shelf(obs, False):
+        #     if obs.is_carrying:
+        #         reward -= 0.003
+        #     else:
+        #         reward -= 0.0015
+        #
+        #     reward -= RewardCalculator.position_reward(env, obs.x, obs.y)
+
+        # find out requested item
+        if RewardCalculator.find_requested_shelf(obs) is not None:
+            reward += 0.001
+
+        return reward
@@ -0,0 +1,16 @@
+from enum import Enum
+
+
+class Action(Enum):
+    NOOP = 0
+    FORWARD = 1
+    LEFT = 2
+    RIGHT = 3
+    TOGGLE_LOAD = 4
+
+
+class Direction(Enum):
+    UP = 0
+    DOWN = 1
+    LEFT = 2
+    RIGHT = 3