diff --git a/README.md b/README.md
index a93eb3a1..98a81329 100755
--- a/README.md
+++ b/README.md
@@ -134,8 +134,8 @@ env:
 ## Task configuration
 The task configuration part (section `coordinator[agents]`) defines the starting and goal position of the attacker and the type of defender that is used.
 
-### Attacker configuration (`attackers`)
-Configuration of the attacking agents. Consists of two parts:
+### Attacker configuration (`Attacker`)
+Configuration of the attacking agents. Consists of three parts:
 1. Goal definition (`goal`) which describes the `GameState` properties that must be fulfilled to award `goal_reward` to the attacker:
     - `known_networks:`(set)
     - `known_hosts`(set)
@@ -154,11 +154,14 @@ Configuration of the attacking agents. Consists of two parts:
     - `known_data`(dict)
 
     The initial network configuration must assign at least **one** controlled host to the attacker in the network. Any item in `controlled_hosts` is copied to `known_hosts`, so there is no need to include these in both sets. `known_networks` is also extended with a set of **all** networks accessible from the `controlled_hosts`
+3. Definition of maximum allowed amount of steps:
+    - `max_steps:`(int)
 
 Example attacker configuration:
 ```YAML
 agents:
   Attacker:
+    max_steps: 100
     goal:
       randomize_goal_every_episode: False
       known_networks: []
@@ -179,7 +182,7 @@ agents:
       known_data: {}
       known_blocks: {}
 ```
-### Defender configuration (`defenders`)
+### Defender configuration (`Defender`)
 Currently, the defender **is** a separate agent.
 
 If you want a defender in the game, you must connect a defender agent. For playing without a defender, leave the section empty.
diff --git a/coordinator.py b/coordinator.py
index 5839be01..27ca1be8 100644
--- a/coordinator.py
+++ b/coordinator.py
@@ -188,7 +188,7 @@ def __init__(self, actions_queue, answers_queue, net_sec_config, allowed_roles,
         self._starting_positions_per_role = self._get_starting_position_per_role()
         self._win_conditions_per_role = self._get_win_condition_per_role()
         self._goal_description_per_role = self._get_goal_description_per_role()
-        self._steps_limit = self._world.task_config.get_max_steps()
+        self._steps_limit_per_role = self._get_max_steps_per_role()
         self._use_global_defender = self._world.task_config.get_use_global_defender()
         # player information
         self.agents = {}
@@ -201,18 +201,19 @@ def __init__(self, actions_queue, answers_queue, net_sec_config, allowed_roles,
         self._agent_starting_position = {}
         # current state per agent_addr (GameState)
         self._agent_states = {}
-        # goal reach status per agent_addr (bool)
-        self._agent_goal_reached = {}
-        self._agent_episode_ends = {}
-        self._agent_detected = {}
+        # agent status dict {agent_addr: string}
+        self._agent_statuses = {}
+        # agent status dict {agent_addr: int}
+        self._agent_rewards = {}
         # trajectories per agent_addr
         self._agent_trajectories = {}
     
     @property
     def episode_end(self)->bool:
-        # Terminate episode if at least one player wins or reaches the timeout
-        self.logger.debug(f"End evaluation: {self._agent_episode_ends.values()}")
-        return all(self._agent_episode_ends.values())
+        # Episode ends ONLY IF all agents with defined max_steps reached the end fo the episode
+        exists_active_player = any(status == "playing_active" for status in self._agent_statuses.values())
+        self.logger.debug(f"End evaluation: {self._agent_statuses.items()} - Episode end:{not exists_active_player}")
+        return not exists_active_player
     
     @property
     def config_file_hash(self):
@@ -273,8 +274,13 @@ async def run(self):
                                     self._reset_requests[agent] = False
                                     self._agent_steps[agent] = 0
                                     self._agent_states[agent] = self._world.create_state_from_view(self._agent_starting_position[agent])
-                                    self._agent_goal_reached[agent] = self._goal_reached(agent)
-                                    self._agent_episode_ends[agent] = False
+                                    self._agent_rewards.pop(agent, None)
+                                    if self._steps_limit_per_role[self.agents[agent][1]]:
+                                        # This agent can force episode end (has timeout and goal defined)
+                                        self._agent_statuses[agent] = "playing_active"
+                                    else:
+                                        # This agent can NOT force episode end (does NOT timeout or goal defined)
+                                        self._agent_statuses[agent] = "playing"      
                                     output_message_dict = self._create_response_to_reset_game_action(agent)
                                     msg_json = self.convert_msg_dict_to_json(output_message_dict)
                                     # Send to anwer_queue
@@ -307,9 +313,13 @@ def _initialize_new_player(self, agent_addr:tuple, agent_name:str, agent_role:st
         self._reset_requests[agent_addr] = False
         self._agent_starting_position[agent_addr] = self._starting_positions_per_role[agent_role]
         self._agent_states[agent_addr] = self._world.create_state_from_view(self._agent_starting_position[agent_addr])
-        self._agent_goal_reached[agent_addr] = self._goal_reached(agent_addr) 
-        self._agent_detected[agent_addr] = self._check_detection(agent_addr, None) 
-        self._agent_episode_ends[agent_addr] = False
+        
+        if self._steps_limit_per_role[agent_role]:
+            # This agent can force episode end (has timeout and goal defined)
+            self._agent_statuses[agent_addr] = "playing_active"
+        else:
+            # This agent can NOT force episode end (does NOT timeout or goal defined)
+            self._agent_statuses[agent_addr] = "playing"      
         if self._world.task_config.get_store_trajectories() or self._use_global_defender:
             self._agent_trajectories[agent_addr] = self._reset_trajectory(agent_addr)
         self.logger.info(f"\tAgent {agent_name} ({agent_addr}), registred as {agent_role}")
@@ -323,10 +333,10 @@ def _remove_player(self, agent_addr:tuple)->dict:
         agent_info = {}
         if agent_addr in self.agents:
             agent_info["state"] = self._agent_states.pop(agent_addr)
-            agent_info["goal_reached"] = self._agent_goal_reached.pop(agent_addr)
+            agent_info["status"] = self._agent_statuses.pop(agent_addr)
             agent_info["num_steps"] = self._agent_steps.pop(agent_addr)
             agent_info["reset_request"] = self._reset_requests.pop(agent_addr)
-            agent_info["episode_end"] = self._agent_episode_ends.pop(agent_addr)
+            agent_info["end_reward"] = self._agent_rewards.pop(agent_addr, None)
             agent_info["agent_info"] = self.agents.pop(agent_addr)
             self.logger.debug(f"\t{agent_info}")
         else:
@@ -376,6 +386,19 @@ def _get_goal_description_per_role(self)->dict:
             self.logger.info(f"Goal description for role '{agent_role}': {goal_descriptions[agent_role]}")
         return goal_descriptions
     
+    def _get_max_steps_per_role(self)->dict:
+        """
+        Method for finding max amount of steps in 1 episode for each agent role in the game.
+        """
+        max_steps = {}
+        for agent_role in self.ALLOWED_ROLES:
+            try:
+                max_steps[agent_role] = self._world.task_config.get_max_steps(agent_role)
+            except KeyError:
+                max_steps[agent_role] = None
+            self.logger.info(f"Max steps in episode for '{agent_role}': {max_steps[agent_role]}")
+        return max_steps
+    
     def _process_join_game_action(self, agent_addr: tuple, action: Action) -> dict:
         """ "
         Method for processing Action of type ActionType.JoinGame
@@ -386,14 +409,13 @@ def _process_join_game_action(self, agent_addr: tuple, action: Action) -> dict:
             agent_role = action.parameters["agent_info"].role
             if agent_role in self.ALLOWED_ROLES:
                 initial_observation = self._initialize_new_player(agent_addr, agent_name, agent_role)
-                max_steps =  self._world._max_steps if agent_role == "Attacker" else None
                 output_message_dict = {
                     "to_agent": agent_addr,
                     "status": str(GameStatus.CREATED),
                     "observation": observation_as_dict(initial_observation),
                     "message": {
                         "message": f"Welcome {agent_name}, registred as {agent_role}",
-                        "max_steps": max_steps,
+                        "max_steps": self._steps_limit_per_role[agent_role],
                         "goal_description": self._goal_description_per_role[agent_role],
                         "num_actions": self._world.num_actions,
                         "configuration_hash": self._CONFIG_FILE_HASH
@@ -436,8 +458,9 @@ def _create_response_to_reset_game_action(self, agent_addr: tuple) -> dict:
             "observation": observation_as_dict(new_observation),
             "message": {
                         "message": "Resetting Game and starting again.",
-                        "max_steps": self._world._max_steps,
-                        "goal_description": self._goal_description_per_role[self.agents[agent_addr][1]]
+                        "max_steps": self._steps_limit_per_role[self.agents[agent_addr][1]],
+                        "goal_description": self._goal_description_per_role[self.agents[agent_addr][1]],
+                         "configuration_hash": self._CONFIG_FILE_HASH
                         },
         }
         return output_message_dict
@@ -491,24 +514,34 @@ def _process_generic_action(self, agent_addr: tuple, action: Action) -> dict:
             current_state = self._agent_states[agent_addr]
             # Build new Observation for the agent
             self._agent_states[agent_addr] = self._world.step(current_state, action, agent_addr)
-            self._agent_goal_reached[agent_addr] = self._goal_reached(agent_addr)
-
-            self._agent_detected[agent_addr] = self._check_detection(agent_addr, action)
-
+            # check timout
+            if self._max_steps_reached(agent_addr):
+                self._agent_statuses[agent_addr] = "max_steps"           
+            # check detection
+            if self._check_detection(agent_addr, action):
+                self._agent_statuses[agent_addr] = "blocked"
+                self._agent_detected[agent_addr] = True            
+            # check goal
+            if self._goal_reached(agent_addr):
+                self._agent_statuses[agent_addr] = "goal_reached"
+            # add reward for taking a step
             reward = self._world._rewards["step"]
+            
             obs_info = {}
             end_reason = None
-            if self._agent_goal_reached[agent_addr]:
-                reward += self._world._rewards["goal"]
-                self._agent_episode_ends[agent_addr] = True
+            if self._agent_statuses[agent_addr] == "goal_reached":
+                self._assign_end_rewards()
+                reward += self._agent_rewards[agent_addr]
                 end_reason = "goal_reached"
                 obs_info = {'end_reason': "goal_reached"}
-            elif self._timeout_reached(agent_addr):
-                self._agent_episode_ends[agent_addr] = True
+            elif self._agent_statuses[agent_addr] == "max_steps":
+                self._assign_end_rewards()
+                reward += self._agent_rewards[agent_addr]
                 obs_info = {"end_reason": "max_steps"}
                 end_reason = "max_steps"
-            elif self._agent_detected[agent_addr]:
-                reward += self._world._rewards["detection"]
+            elif self._agent_statuses[agent_addr] == "blocked":
+                self._assign_end_rewards()
+                reward += self._agent_rewards[agent_addr]
                 self._agent_episode_ends[agent_addr] = True
                 obs_info = {"end_reason": "max_steps"}
             
@@ -524,6 +557,7 @@ def _process_generic_action(self, agent_addr: tuple, action: Action) -> dict:
                 "status": str(GameStatus.OK),
             }
         else:
+            self._assign_end_rewards()
             self.logger.error(f"{self.episode_end}, {self._agent_episode_ends}")
             output_message_dict = self._generate_episode_end_message(agent_addr)
         return output_message_dict
@@ -533,15 +567,8 @@ def _generate_episode_end_message(self, agent_addr:tuple)->dict:
         Method for generating response when agent attemps to make a step after episode ended.
         """
         current_observation = self._agent_observations[agent_addr]
-        reward = 0 # TODO
-        end_reason = ""
-        if self._agent_goal_reached[agent_addr]:
-            end_reason = "goal_reached"
-        elif self._timeout_reached(agent_addr):
-            end_reason = "max_steps"
-        else:
-            end_reason = "game_lost"
-            reward += self._world._rewards["detection"]
+        reward = self._agent_rewards[agent_addr]
+        end_reason = self._agent_statuses[agent_addr]
         new_observation = Observation(
             current_observation.state,
             reward=reward,
@@ -586,7 +613,7 @@ def goal_dict_satistfied(goal_dict:dict, known_dict: dict)-> bool:
                     if len(matching_keys) == len(goal_dict.keys()):
                         return True
                 except KeyError:
-                    #some keys are missing in the known_dict
+                    # some keys are missing in the known_dict
                     return False
             return False
         
@@ -615,18 +642,58 @@ def _check_detection(self, agent_addr:tuple, last_action:Action)->bool:
             self.logger.info("\tNot detected!")
         return detection
     
-    def _timeout_reached(self, agent_addr:tuple) ->bool:
+    def _max_steps_reached(self, agent_addr:tuple) ->bool:
         """
         Checks if the agent reached the max allowed steps. Only applies to role 'Attacker'
         """
         self.logger.debug(f"Checking timout for {self.agents[agent_addr]}")
-        if self.agents[agent_addr][1] == "Attacker":
-            if self._agent_steps[agent_addr] >= self._steps_limit:
+        agent_role = self.agents[agent_addr][1]
+        if self._steps_limit_per_role[agent_role]:
+            if self._agent_steps[agent_addr] >= self._steps_limit_per_role[agent_role]:
                 self.logger.info("Timeout reached by {self.agents[agent_addr]}!")
                 return True
         else:
+            self.logger.debug(f"No max steps defined for role {agent_role}")
             return False
 
+    def _assign_end_rewards(self)->None:
+        """
+        Method which assings rewards to each agent which has finished playing
+        """
+        is_episode_over = self.episode_end
+        for agent, status in self._agent_statuses.items():
+            if agent not in self._agent_rewards.keys(): # reward has not been assigned yet
+                agent_name, agent_role = self.agents[agent]
+                if agent_role == "Attacker":
+                    match status:
+                        case "goal_reached":
+                            self._agent_rewards[agent] = self._world._rewards["goal"]
+                        case "max_steps":
+                            self._agent_rewards[agent] = 0
+                        case "blocked":
+                            self._agent_rewards[agent] = self._world._rewards["detection"]
+                    self.logger.info(f"End reward for {agent_name}({agent_role}, status: '{status}') = {self._agent_rewards[agent]}")
+                elif agent_role == "Defender":
+                    if self._agent_statuses[agent] == "max_steps": #defender was responsible for the end
+                        raise NotImplementedError
+                        self._agent_rewards[agent] = 0
+                    else:
+                        if is_episode_over: #only assign defender's reward when episode ends
+                            sucessful_attacks = list(self._agent_statuses.values).count("goal_reached")
+                            if sucessful_attacks > 0:
+                                self._agent_rewards[agent] = sucessful_attacks*self._world._rewards["detection"]
+                                self._agent_statuses[agent] = "game_lost"
+                            else: #no successful attacker
+                                self._agent_rewards[agent] = self._world._rewards["goal"]
+                                self._agent_statuses[agent] = "goal_reached"
+                            self.logger.info(f"End reward for {agent_name}({agent_role}, status: '{status}') = {self._agent_rewards[agent]}")
+                else:
+                    if is_episode_over:
+                        self._agent_rewards[agent] = 0
+                        self.logger.info(f"End reward for {agent_name}({agent_role}, status: '{status}') = {self._agent_rewards[agent]}")
+                        
+                    
+
 __version__ = "v0.2.2"
 
 
@@ -668,7 +735,7 @@ def _timeout_reached(self, agent_addr:tuple) ->bool:
         action="store",
         required=False,
         type=str,
-        default="INFO",
+        default="WARNING",
     )
 
     args = parser.parse_args()
diff --git a/docs/Coordinator.md b/docs/Coordinator.md
index e8f65076..5bca3080 100644
--- a/docs/Coordinator.md
+++ b/docs/Coordinator.md
@@ -1,4 +1,43 @@
 # Coordinator
+Coordinator is the centerpiece of the game orchestration. It provides an interface between the agents and the AIDojo world.
+
+1. Registration of new agents in the game
+2. Verification of agents' actionf format
+3. Recording (and storing) trajectories of agents
+4. Detection of episode ends (either by reaching timout or agents reaching their respective goals)
+5. Assigning rewards for each action and at the end of each episode
+6. Removing agents from the game
+7. Registering the GameReset requests and handelling the game resets.
+
+## Main components of the coordinator
+`self._actions_queue`: asycnio queue for agent -> aidojo_world communication
+`self._answers_queue`: asycnio queue for aidojo_world -> agent communication
+`self.ALLOWED_ROLES`: list of allowed agent roles [`Attacker`, `Defender`, `Benign`]
+`self._world`: Instance of `AIDojoWorld`. Implements the dynamics of the world   
+`self._CONFIG_FILE_HASH`: hash of the configuration file used in the interaction (scenario, topology, etc.). Used for better reproducibility of results
+`self._starting_positions_per_role`: dictionary of starting position of each agent type from `self.ALLOWED_ROLES`
+`self._win_conditions_per_role`: dictionary of goal state for each agent type from `self.ALLOWED_ROLES`
+`self._goal_description_per_role`: dictionary of textual description of goal of each agent type from `self.ALLOWED_ROLES`
+`self._steps_limit_per_role`: dictionary of maximum allowed steps per episode for of each agent type from `self.ALLOWED_ROLES`
+`self._use_global_defender`: Inditaction of presence of Global defender (deprecated)
+
+### Agent information components
+`self.agents`: information about connected agents {`agent address`: (`agent_name`,`agent_role`)}
+`self._agent_steps`: step counter for each agent in the current episode
+`self._reset_requests`: dictionary where requests for episode reset are collected (the world resets only if ALL agents request reset)
+`self._agent_observations`: current observation per agent
+`self._agent_starting_position`: starting position (with wildcards, see [configuration](../README.md#task-configuration)) per agent
+`self._agent_states`: current GameState per agent 
+`self._agent_statuses`: status of each agent. One of following options:
+    - `playing`: agent is registered and can participate in current episode. Can't influence the episode termination
+    - `playing_active`: agent is registered and can participate in current episode. It has `goal` and `max_steps` defined and can influence the termination of the episode
+    - `goal_reached`: agent has reached it's goal in this episode. It can't perform any more actions until the interaction is resetted.
+    - `blocked`: agent has been blocked. It can't perform any more actions until the interaction is resetted.
+    - `max_steps`: agent has reached it's maximum allowed steps. It can't perform any more actions until the interaction is resetted.
+
+
+`self._agent_rewards`: dictionary of final reward of each agent in the current episod. Only agent's which can't participate in the ongoing episode are listed.
+`self._agent_trajectories`: complete trajectories for each agent in the ongoing episode
 
 ## The format of the messages to the agents is
     {
diff --git a/env/netsecenv_conf.yaml b/env/netsecenv_conf.yaml
index 45675b2e..e24a5424 100644
--- a/env/netsecenv_conf.yaml
+++ b/env/netsecenv_conf.yaml
@@ -4,6 +4,7 @@ coordinator:
   # Define the conf for all the 'attacker' agents.
   agents:
     Attacker:
+      max_steps: 50
       goal:
         description: "Exfiltrate data to '213.47.23.195'"
         is_any_part_of_goal_random: True
@@ -28,7 +29,6 @@ coordinator:
         known_blocks: {}
         # Example of known blocks. In the host 192.168.2.2, block all connections coming or going to 192.168.1.3
         # known_blocks: {192.168.2.2: {192.168.1.3}}
-
       start_position:
         known_networks: []
         known_hosts: []
@@ -99,7 +99,6 @@ env:
   # random_seed: 42
   scenario: 'scenario1'
   use_global_defender: False
-  max_steps: 50
   use_dynamic_addresses: False
   use_firewall: True
   save_trajectories: False
diff --git a/env/worlds/network_security_game.py b/env/worlds/network_security_game.py
index c2bd3532..b7dec48f 100755
--- a/env/worlds/network_security_game.py
+++ b/env/worlds/network_security_game.py
@@ -45,9 +45,6 @@ def __init__(self, task_config_file, world_name="NetSecEnv") -> None:
         self._seed = seed
         self.logger.info(f'Setting env seed to {seed}')
 
-        # Set maximum number of steps in one episode
-        self._max_steps = self.task_config.get_max_steps()
-        self.logger.info(f"\tSetting max steps to {self._max_steps}")
         
         # Set rewards for goal/detection/step
         self._rewards = {
diff --git a/tests/manual/three_nets/test_three_net_scenario.py b/tests/manual/three_nets/test_three_net_scenario.py
index 34a067a9..eff14236 100644
--- a/tests/manual/three_nets/test_three_net_scenario.py
+++ b/tests/manual/three_nets/test_three_net_scenario.py
@@ -11,7 +11,7 @@
 if __name__ == "__main__":
 
     # !!! RUN THE COORDINATOR IN SEPARATE PROCESS AS FOLLOWS !!!
-    # python3 coordinator.py  --task_config=./tests/manual/three_nets/three_net_testing_conf.yaml
+    # python3 coordinator.py  --task_config=./tests/manual/three_nets/three_net_testing_conf.yaml -l DEBUG
     NSE_config = "./three_net_testing_conf.yaml"
     coordinator_conf = "../../../coordinator.conf"
     log_filename = "three_net_test.log"
diff --git a/tests/manual/three_nets/three_net_testing_conf.yaml b/tests/manual/three_nets/three_net_testing_conf.yaml
index 93414c96..8829b4b5 100644
--- a/tests/manual/three_nets/three_net_testing_conf.yaml
+++ b/tests/manual/three_nets/three_net_testing_conf.yaml
@@ -9,6 +9,7 @@ coordinator:
   # Define the conf for all the 'attacker' agents.
   agents:
     Attacker:
+      max_steps: 15
       goal:
         description: "Exfiltrate password DB data to '213.47.23.195'"
         is_any_part_of_goal_random: True
@@ -58,7 +59,6 @@ env:
   # random_seed: 42
   scenario: 'three_nets'
   use_global_defender: False
-  max_steps: 15
   use_dynamic_addresses: False
   use_firewall: True
   save_trajectories: False
diff --git a/tests/netsecenv-task-for-testing.yaml b/tests/netsecenv-task-for-testing.yaml
index a7883b12..6fa91775 100644
--- a/tests/netsecenv-task-for-testing.yaml
+++ b/tests/netsecenv-task-for-testing.yaml
@@ -5,6 +5,7 @@ coordinator:
   # Define the conf for all the 'attacker' agents.
   agents:
     Attacker:
+      max_steps: 15
       goal:
         description: "Exfiltrate DatabaseData to '213.47.23.195'"
         # Put randomize_goal_every_episode in True if you put any of the goal values in 'random'
@@ -53,7 +54,7 @@ coordinator:
         #known_data: {213.47.23.195: [User1,DataFromServer1], 192.168.3.1: [User3,Data3FromServer3]}
         # Example to ask a random data in a specific server. Putting 'random' in the data, forces the env to randomly choose where the goal data is
         # known_data: {213.47.23.195: [random]}
-        known_blocks: {'all_routers': 'all_attackers'}
+        known_blocks: {}
         # Example of known blocks. In the host 192.168.2.2, block all connections coming or going to 192.168.1.3
         # known_blocks: {192.168.2.2: {192.168.1.3}}
         # You can also use the wildcard string 'all_routers', and 'all_attackers', to mean that all the controlled hosts of all the attackers should be in this list in order to win
@@ -83,7 +84,6 @@ env:
   # Or you can fix the seed
   # random_seed: 42
   scenario: 'scenario1_small'
-  max_steps: 15
   store_replay_buffer: False
   use_dynamic_addresses: False
   goal_reward: 100
diff --git a/tests/test_coordinator.py b/tests/test_coordinator.py
index 2b15ebc8..6413a36c 100644
--- a/tests/test_coordinator.py
+++ b/tests/test_coordinator.py
@@ -53,8 +53,8 @@ def test_class_init(self):
         assert coord._agent_starting_position == {}
         assert coord._agent_observations == {}
         assert coord._agent_states == {}
-        assert coord._agent_goal_reached == {}
-        assert coord._agent_episode_ends == {}
+        assert coord._agent_rewards == {}
+        assert coord._agent_statuses == {}
         assert type(coord._actions_queue) is queue.Queue
         assert type(coord._answers_queue) is queue.Queue
     
@@ -69,7 +69,7 @@ def test_initialize_new_player(self, coordinator_init):
         assert coord.agents[agent_addr] == (agent_name, agent_role)
         assert coord._agent_steps[agent_addr] == 0
         assert not coord._reset_requests[agent_addr]
-        assert not coord._agent_episode_ends[agent_addr]
+        assert coord._agent_statuses[agent_addr] == "playing_active"
 
         assert new_obs.reward == 0
         assert new_obs.end is False
@@ -219,5 +219,7 @@ def test_timeout(self, coordinator_registered_player):
         assert result["to_agent"] == ("192.168.1.1", "3300")
         assert result["status"] == "GameStatus.OK"
         assert init_result["observation"]["state"] != result["observation"]["state"]
+        assert coord._agent_steps[("192.168.1.1", "3300")] == 15
+        assert coord._agent_statuses[("192.168.1.1", "3300")] == "max_steps"
         assert result["observation"]["end"]
         assert result["observation"]["info"]["end_reason"] == "max_steps"
\ No newline at end of file
diff --git a/utils/utils.py b/utils/utils.py
index a2a8dc2a..7a53d276 100644
--- a/utils/utils.py
+++ b/utils/utils.py
@@ -376,12 +376,20 @@ def get_win_conditions(self, agent_role):
                 }
             case _:
                 raise ValueError(f"Unsupported agent role: {agent_role}")
-    def get_max_steps(self):
+    
+    def get_max_steps(self, role=str)->int:
         """
-        Get the max steps 
+        Get the max steps based on agent's role
         """
-        max_steps = self.config['env']['max_steps']
-        return int(max_steps)
+        try:
+            max_steps = int(self.config['coordinator']['agents'][role]["max_steps"])
+        except KeyError:
+            max_steps = None
+            self.logger.warning(f"Item 'max_steps' not found in 'coordinator.agents.{role}'!. Setting value to default=None (no step limit)")
+        except TypeError as e:
+            max_steps = None
+            self.logger.warning(f"Unsupported value in 'coordinator.agents.{role}.max_steps': {e}. Setting value to default=None (no step limit)")
+        return max_steps
 
 
     def get_goal_description(self, agent_role)->dict: