Merge pull request #261 from stratosphereips/fix-global-defender

eldraco · web-flow · commit 4cc44c08711b · 2025-01-18T20:11:08.000+01:00
Fix-global-defender
diff --git a/README.md b/README.md
@@ -275,5 +275,25 @@ This will load and run the unit tests in the `tests` folder.
 ## Code adaptation for new configurations
 The code can be adapted to new configurations of games and for new agents. See [Agent repository](https://github.com/stratosphereips/NetSecGameAgents/tree/main) for more details.
 
+## Function calling diagram
+
+```
+_handle_world_responses
+    ├── _world_response_queue.get()  # Called continuously to get a response from the World Response Queue.
+    ├── _process_world_response      # Called to process the response from the world.
+    │       ├── _process_world_response_created  # Called if agent status is JoinRequested. Processes agent creation.
+    │       ├── _process_world_response_reset_done  # Called if agent status is ResetRequested. Processes agent reset.
+    │       ├── _remove_player  # Called if agent status is Quitting. Removes the agent from the world.
+    │       └── _process_world_response_step  # Called if agent status is Ready, Playing, or PlayingActive. Processes a game step.
+    ├── _answers_queues[agent_id].put()  # Called to place the processed response in the agent's answer queue.
+    └── asyncio.sleep()  # Called to yield control back to the event loop.
+
+_process_world_response
+    ├── _process_world_response_created  # Called if agent status is JoinRequested. Processes agent creation.
+    ├── _process_world_response_reset_done  # Called if agent status is ResetRequested. Processes agent reset.
+    ├── _remove_player  # Called if agent status is Quitting. Removes the agent from the world.
+    └── _process_world_response_step  # Called if agent status is Ready, Playing, or PlayingActive. Processes a game step.
+    ```
+
 ## About us
 This code was developed at the [Stratosphere Laboratory at the Czech Technical University in Prague](https://www.stratosphereips.org/).
diff --git a/coordinator.py b/coordinator.py
@@ -617,7 +617,9 @@ def _check_detection(self, agent_addr:tuple, last_action:Action)->bool:
         if last_action:
             if self._use_global_defender:
                 self.logger.warning("Global defender - ONLY use for backward compatibility!")
-                episode_actions = self._agent_trajectories[agent_addr]["actions"] if "actions" in self._agent_trajectories[agent_addr] else []
+                episode_actions = None
+                if (agent_addr in self._agent_trajectories and "trajectory" in self._agent_trajectories[agent_addr] and "actions" in self._agent_trajectories[agent_addr]["trajectory"]):
+                    episode_actions = self._agent_trajectories[agent_addr]["trajectory"]["actions"]
                 detection =  stochastic_with_threshold(last_action, episode_actions)
         if detection:
             self.logger.info("\tDetected!")
@@ -723,7 +725,7 @@ def _process_world_response(self, agent_addr:tuple, response:tuple)-> str:
                 self._remove_player(agent_addr)
             elif agent_status in [AgentStatus.Ready, AgentStatus.Playing, AgentStatus.PlayingActive]:
                 output_message_dict = self._process_world_response_step(agent_addr, game_status, agent_new_state)
-            elif agent_status in [AgentStatus.FinishedBlocked, AgentStatus.FinishedGameLost, AgentStatus.FinishedGoalReached, AgentStatus.FinishedMaxSteps]:
+            elif agent_status in [AgentStatus.FinishedBlocked, AgentStatus.FinishedGameLost, AgentStatus.FinishedGoalReached, AgentStatus.FinishedMaxSteps]: # This if does not make sense. Put together with the previous (sebas)
                 output_message_dict = self._process_world_response_step(agent_addr, game_status, agent_new_state)
             else:
                 self.logger.error(f"Unsupported value '{agent_status}'!")
@@ -804,7 +806,7 @@ def _process_world_response_step(self, agent_addr:tuple, game_status:GameStatus,
             if not self.episode_end:
                 # increase the action counter
                 self._agent_steps[agent_addr] += 1
-                self.logger.info(f"{agent_addr} steps: {self._agent_steps[agent_addr]}")
+                self.logger.info(f"Agent {agent_addr} did #steps: {self._agent_steps[agent_addr]}")
                 # register the new state
                 self._agent_states[agent_addr] = agent_new_state
                 # load the action which lead to the new state
diff --git a/env/global_defender.py b/env/global_defender.py
@@ -4,30 +4,35 @@
 from random import random
 
 
+# The probability of detecting an action is defined by the following dictionary
 DEFAULT_DETECTION_PROBS = {
     ActionType.ScanNetwork: 0.05,
     ActionType.FindServices: 0.075,
     ActionType.ExploitService: 0.1,
     ActionType.FindData: 0.025,
     ActionType.ExfiltrateData: 0.025,
-    ActionType.BlockIP:0
+    ActionType.BlockIP: 0.01
 }
 
-TW_RATIOS = {
+# Ratios of action types in the time window (TW) for each action type. The ratio should be higher than the defined value to trigger a detection check
+TW_TYPE_RATIOS_THRESHOLD = {
     ActionType.ScanNetwork: 0.25,
     ActionType.FindServices: 0.3,
     ActionType.ExploitService: 0.25,
     ActionType.FindData: 0.5,
     ActionType.ExfiltrateData: 0.25,
-    ActionType.BlockIP:1
+    ActionType.BlockIP: 1
 }
 
-CONSECUTIVE_THRESHOLD = {
+# Thresholds for consecutive actions of the same type in the TW. Only if the threshold is crossed, the detection check is triggered
+TW_CONSECUTIVE_TYPE_THRESHOLD = {
     ActionType.ScanNetwork: 2,
     ActionType.FindServices: 3,
-    ActionType.ExfiltrateData: 2,
+    ActionType.ExfiltrateData: 2
 }
-REPEATED_THRESHOLD = {
+
+# Thresholds for repeated actions in the episode. Only if the threshold is crossed, the detection check is triggered
+EPISODE_REPEATED_ACTION_THRESHOLD = {
     ActionType.ExploitService: 2,
     ActionType.FindData: 2,
 }
@@ -37,36 +42,39 @@ def stochastic(action_type:ActionType)->bool:
     Simple random detection based on predefied probability and ActionType
     """
     roll = random()
-    return roll < DEFAULT_DETECTION_PROBS[action_type]
+    if roll < DEFAULT_DETECTION_PROBS[action_type]:
+        return True
+    else:
+        return False
 
-def stochastic_with_threshold(action:Action, episode_actions:list, tw_size:int=5)-> bool:
+def stochastic_with_threshold(action: Action, episode_actions:list, tw_size:int=5)-> bool:
     """
     Only detect based on set probabilities if pre-defined thresholds are crossed.
     """
     # extend the episode with the latest action
-    episode_actions.append(action)
-    if len(episode_actions) >= tw_size:
-        last_n_actions = episode_actions[-tw_size:]
-        last_n_action_types = [action.type for action in last_n_actions]
+    # We need to copy the list before the copying, so we avoid modifying it when it is returned. Modifycation of passed list is the default behavior in Python
+    temp_episode_actions = episode_actions.copy()
+    temp_episode_actions.append(action.as_dict)
+    if len(temp_episode_actions) >= tw_size:
+        last_n_actions = temp_episode_actions[-tw_size:]
+        last_n_action_types = [action['type'] for action in last_n_actions]
         # compute ratio of action type in the TW
-        tw_ratio = last_n_action_types.count(action.type)/tw_size
+        tw_ratio = last_n_action_types.count(str(action.type))/tw_size
         # Count how many times this exact (parametrized) action was played in episode
-        repeats_in_episode = episode_actions.count(action)
-        # Compute Action type ration in the TW
-        tw_ratio = last_n_action_types.count(action.type)/tw_size
+        repeats_in_episode = temp_episode_actions.count(action.as_dict)
         # compute the highest consecutive number of action type in TW
-        max_consecutive_action_type= max(sum(1 for item in grouped if item == action.type)
+        max_consecutive_action_type = max(sum(1 for item in grouped if item == str(action.type))
                                             for _, grouped in groupby(last_n_action_types))
         
-        if action.type in CONSECUTIVE_THRESHOLD.keys():
+        if action.type in TW_CONSECUTIVE_TYPE_THRESHOLD.keys():
             # ScanNetwork, FindServices, ExfiltrateData
-            if tw_ratio < TW_RATIOS[action.type] and max_consecutive_action_type < CONSECUTIVE_THRESHOLD[action.type]:
+            if tw_ratio < TW_TYPE_RATIOS_THRESHOLD[action.type] and max_consecutive_action_type < TW_CONSECUTIVE_TYPE_THRESHOLD[action.type]:
                 return False
             else:
                 return stochastic(action.type)
-        elif action.type in REPEATED_THRESHOLD.keys():
+        elif action.type in EPISODE_REPEATED_ACTION_THRESHOLD.keys():
             # FindData, Exploit service
-            if tw_ratio < TW_RATIOS[action.type] and repeats_in_episode < REPEATED_THRESHOLD[action.type]:
+            if tw_ratio < TW_TYPE_RATIOS_THRESHOLD[action.type] and repeats_in_episode < EPISODE_REPEATED_ACTION_THRESHOLD[action.type]:
                 return False
             else:
                 return stochastic(action.type)
diff --git a/env/worlds/aidojo_world.py b/env/worlds/aidojo_world.py
@@ -57,6 +57,29 @@ def update_goal_dict(self, goal_dict:dict)->dict:
         raise NotImplementedError
 
     async def handle_incoming_action(self)->None:
+        """
+        Asynchronously handles incoming actions from agents and processes them accordingly.
+
+        This method continuously listens for actions from the `_action_queue`, processes them based on their type,
+        and sends the appropriate response to the `_response_queue`. It handles different types of actions such as
+        joining a game, quitting a game, and resetting the game. For other actions, it updates the game state by
+        calling the `step` method.
+
+        Raises:
+            asyncio.CancelledError: If the task is cancelled, it logs the termination message.
+
+        Action Types:
+            - ActionType.JoinGame: Creates a new game state and sends a CREATED status.
+            - ActionType.QuitGame: Sends an OK status with an empty game state.
+            - ActionType.ResetGame: Resets the world if the agent is "world", otherwise resets the game state and sends a RESET_DONE status.
+            - Other: Updates the game state using the `step` method and sends an OK status.
+
+        Logging:
+            - Logs the start of the task.
+            - Logs received actions and game states from agents.
+            - Logs the messages being sent to agents.
+            - Logs termination due to `asyncio.CancelledError`.
+        """
         try:
             self.logger.info(f"\tStaring {self.world_name} task.")
             while True:
@@ -77,7 +100,7 @@ async def handle_incoming_action(self)->None:
                         new_state = self.step(game_state, action,agent_id)
                         msg = (agent_id, (new_state, GameStatus.OK))
                 # new_state = self.step(state, action, agent_id)
-                self.logger.debug(f"Sending to{agent_id}: {msg}")
+                self.logger.debug(f"Sending to {agent_id}: {msg}")
                 await self._response_queue.put(msg)
                 await asyncio.sleep(0)
         except asyncio.CancelledError:
diff --git a/env/worlds/network_security_game.py b/env/worlds/network_security_game.py
@@ -326,6 +326,22 @@ def _create_new_network_mapping(self)->tuple:
                 new_self_networks[mapping_nets[net]].add(mapping_ips[ip])
         self._networks = new_self_networks
         
+        # Harpo says that here there is a problem that firewall.items() do not return an ip that can be used in the mapping
+        # His solution is: (check)
+        """
+        new_self_firewall = {}
+        for ip, dst_ips in self._firewall.items():
+            if ip not in mapping_ips:
+                self.logger.debug(f"IP {ip} not found in mapping_ips")
+                continue  # Skip this IP if it's not found in the mapping
+
+            new_self_firewall[mapping_ips[ip]] = set()
+           
+            for dst_ip in dst_ips:
+                new_self_firewall[mapping_ips[ip]].add(mapping_ips[dst_ip])
+        self._firewall = new_self_firewall
+        """
+
         #self._firewall
         new_self_firewall = {}
         for ip, dst_ips in self._firewall.items():