diff --git a/README.md b/README.md index a93eb3a1..98a81329 100755 --- a/README.md +++ b/README.md @@ -134,8 +134,8 @@ env: ## Task configuration The task configuration part (section `coordinator[agents]`) defines the starting and goal position of the attacker and the type of defender that is used. -### Attacker configuration (`attackers`) -Configuration of the attacking agents. Consists of two parts: +### Attacker configuration (`Attacker`) +Configuration of the attacking agents. Consists of three parts: 1. Goal definition (`goal`) which describes the `GameState` properties that must be fulfilled to award `goal_reward` to the attacker: - `known_networks:`(set) - `known_hosts`(set) @@ -154,11 +154,14 @@ Configuration of the attacking agents. Consists of two parts: - `known_data`(dict) The initial network configuration must assign at least **one** controlled host to the attacker in the network. Any item in `controlled_hosts` is copied to `known_hosts`, so there is no need to include these in both sets. `known_networks` is also extended with a set of **all** networks accessible from the `controlled_hosts` +3. Definition of maximum allowed amount of steps: + - `max_steps:`(int) Example attacker configuration: ```YAML agents: Attacker: + max_steps: 100 goal: randomize_goal_every_episode: False known_networks: [] @@ -179,7 +182,7 @@ agents: known_data: {} known_blocks: {} ``` -### Defender configuration (`defenders`) +### Defender configuration (`Defender`) Currently, the defender **is** a separate agent. If you want a defender in the game, you must connect a defender agent. For playing without a defender, leave the section empty. diff --git a/coordinator.py b/coordinator.py index 5839be01..27ca1be8 100644 --- a/coordinator.py +++ b/coordinator.py @@ -188,7 +188,7 @@ def __init__(self, actions_queue, answers_queue, net_sec_config, allowed_roles, self._starting_positions_per_role = self._get_starting_position_per_role() self._win_conditions_per_role = self._get_win_condition_per_role() self._goal_description_per_role = self._get_goal_description_per_role() - self._steps_limit = self._world.task_config.get_max_steps() + self._steps_limit_per_role = self._get_max_steps_per_role() self._use_global_defender = self._world.task_config.get_use_global_defender() # player information self.agents = {} @@ -201,18 +201,19 @@ def __init__(self, actions_queue, answers_queue, net_sec_config, allowed_roles, self._agent_starting_position = {} # current state per agent_addr (GameState) self._agent_states = {} - # goal reach status per agent_addr (bool) - self._agent_goal_reached = {} - self._agent_episode_ends = {} - self._agent_detected = {} + # agent status dict {agent_addr: string} + self._agent_statuses = {} + # agent status dict {agent_addr: int} + self._agent_rewards = {} # trajectories per agent_addr self._agent_trajectories = {} @property def episode_end(self)->bool: - # Terminate episode if at least one player wins or reaches the timeout - self.logger.debug(f"End evaluation: {self._agent_episode_ends.values()}") - return all(self._agent_episode_ends.values()) + # Episode ends ONLY IF all agents with defined max_steps reached the end fo the episode + exists_active_player = any(status == "playing_active" for status in self._agent_statuses.values()) + self.logger.debug(f"End evaluation: {self._agent_statuses.items()} - Episode end:{not exists_active_player}") + return not exists_active_player @property def config_file_hash(self): @@ -273,8 +274,13 @@ async def run(self): self._reset_requests[agent] = False self._agent_steps[agent] = 0 self._agent_states[agent] = self._world.create_state_from_view(self._agent_starting_position[agent]) - self._agent_goal_reached[agent] = self._goal_reached(agent) - self._agent_episode_ends[agent] = False + self._agent_rewards.pop(agent, None) + if self._steps_limit_per_role[self.agents[agent][1]]: + # This agent can force episode end (has timeout and goal defined) + self._agent_statuses[agent] = "playing_active" + else: + # This agent can NOT force episode end (does NOT timeout or goal defined) + self._agent_statuses[agent] = "playing" output_message_dict = self._create_response_to_reset_game_action(agent) msg_json = self.convert_msg_dict_to_json(output_message_dict) # Send to anwer_queue @@ -307,9 +313,13 @@ def _initialize_new_player(self, agent_addr:tuple, agent_name:str, agent_role:st self._reset_requests[agent_addr] = False self._agent_starting_position[agent_addr] = self._starting_positions_per_role[agent_role] self._agent_states[agent_addr] = self._world.create_state_from_view(self._agent_starting_position[agent_addr]) - self._agent_goal_reached[agent_addr] = self._goal_reached(agent_addr) - self._agent_detected[agent_addr] = self._check_detection(agent_addr, None) - self._agent_episode_ends[agent_addr] = False + + if self._steps_limit_per_role[agent_role]: + # This agent can force episode end (has timeout and goal defined) + self._agent_statuses[agent_addr] = "playing_active" + else: + # This agent can NOT force episode end (does NOT timeout or goal defined) + self._agent_statuses[agent_addr] = "playing" if self._world.task_config.get_store_trajectories() or self._use_global_defender: self._agent_trajectories[agent_addr] = self._reset_trajectory(agent_addr) self.logger.info(f"\tAgent {agent_name} ({agent_addr}), registred as {agent_role}") @@ -323,10 +333,10 @@ def _remove_player(self, agent_addr:tuple)->dict: agent_info = {} if agent_addr in self.agents: agent_info["state"] = self._agent_states.pop(agent_addr) - agent_info["goal_reached"] = self._agent_goal_reached.pop(agent_addr) + agent_info["status"] = self._agent_statuses.pop(agent_addr) agent_info["num_steps"] = self._agent_steps.pop(agent_addr) agent_info["reset_request"] = self._reset_requests.pop(agent_addr) - agent_info["episode_end"] = self._agent_episode_ends.pop(agent_addr) + agent_info["end_reward"] = self._agent_rewards.pop(agent_addr, None) agent_info["agent_info"] = self.agents.pop(agent_addr) self.logger.debug(f"\t{agent_info}") else: @@ -376,6 +386,19 @@ def _get_goal_description_per_role(self)->dict: self.logger.info(f"Goal description for role '{agent_role}': {goal_descriptions[agent_role]}") return goal_descriptions + def _get_max_steps_per_role(self)->dict: + """ + Method for finding max amount of steps in 1 episode for each agent role in the game. + """ + max_steps = {} + for agent_role in self.ALLOWED_ROLES: + try: + max_steps[agent_role] = self._world.task_config.get_max_steps(agent_role) + except KeyError: + max_steps[agent_role] = None + self.logger.info(f"Max steps in episode for '{agent_role}': {max_steps[agent_role]}") + return max_steps + def _process_join_game_action(self, agent_addr: tuple, action: Action) -> dict: """ " Method for processing Action of type ActionType.JoinGame @@ -386,14 +409,13 @@ def _process_join_game_action(self, agent_addr: tuple, action: Action) -> dict: agent_role = action.parameters["agent_info"].role if agent_role in self.ALLOWED_ROLES: initial_observation = self._initialize_new_player(agent_addr, agent_name, agent_role) - max_steps = self._world._max_steps if agent_role == "Attacker" else None output_message_dict = { "to_agent": agent_addr, "status": str(GameStatus.CREATED), "observation": observation_as_dict(initial_observation), "message": { "message": f"Welcome {agent_name}, registred as {agent_role}", - "max_steps": max_steps, + "max_steps": self._steps_limit_per_role[agent_role], "goal_description": self._goal_description_per_role[agent_role], "num_actions": self._world.num_actions, "configuration_hash": self._CONFIG_FILE_HASH @@ -436,8 +458,9 @@ def _create_response_to_reset_game_action(self, agent_addr: tuple) -> dict: "observation": observation_as_dict(new_observation), "message": { "message": "Resetting Game and starting again.", - "max_steps": self._world._max_steps, - "goal_description": self._goal_description_per_role[self.agents[agent_addr][1]] + "max_steps": self._steps_limit_per_role[self.agents[agent_addr][1]], + "goal_description": self._goal_description_per_role[self.agents[agent_addr][1]], + "configuration_hash": self._CONFIG_FILE_HASH }, } return output_message_dict @@ -491,24 +514,34 @@ def _process_generic_action(self, agent_addr: tuple, action: Action) -> dict: current_state = self._agent_states[agent_addr] # Build new Observation for the agent self._agent_states[agent_addr] = self._world.step(current_state, action, agent_addr) - self._agent_goal_reached[agent_addr] = self._goal_reached(agent_addr) - - self._agent_detected[agent_addr] = self._check_detection(agent_addr, action) - + # check timout + if self._max_steps_reached(agent_addr): + self._agent_statuses[agent_addr] = "max_steps" + # check detection + if self._check_detection(agent_addr, action): + self._agent_statuses[agent_addr] = "blocked" + self._agent_detected[agent_addr] = True + # check goal + if self._goal_reached(agent_addr): + self._agent_statuses[agent_addr] = "goal_reached" + # add reward for taking a step reward = self._world._rewards["step"] + obs_info = {} end_reason = None - if self._agent_goal_reached[agent_addr]: - reward += self._world._rewards["goal"] - self._agent_episode_ends[agent_addr] = True + if self._agent_statuses[agent_addr] == "goal_reached": + self._assign_end_rewards() + reward += self._agent_rewards[agent_addr] end_reason = "goal_reached" obs_info = {'end_reason': "goal_reached"} - elif self._timeout_reached(agent_addr): - self._agent_episode_ends[agent_addr] = True + elif self._agent_statuses[agent_addr] == "max_steps": + self._assign_end_rewards() + reward += self._agent_rewards[agent_addr] obs_info = {"end_reason": "max_steps"} end_reason = "max_steps" - elif self._agent_detected[agent_addr]: - reward += self._world._rewards["detection"] + elif self._agent_statuses[agent_addr] == "blocked": + self._assign_end_rewards() + reward += self._agent_rewards[agent_addr] self._agent_episode_ends[agent_addr] = True obs_info = {"end_reason": "max_steps"} @@ -524,6 +557,7 @@ def _process_generic_action(self, agent_addr: tuple, action: Action) -> dict: "status": str(GameStatus.OK), } else: + self._assign_end_rewards() self.logger.error(f"{self.episode_end}, {self._agent_episode_ends}") output_message_dict = self._generate_episode_end_message(agent_addr) return output_message_dict @@ -533,15 +567,8 @@ def _generate_episode_end_message(self, agent_addr:tuple)->dict: Method for generating response when agent attemps to make a step after episode ended. """ current_observation = self._agent_observations[agent_addr] - reward = 0 # TODO - end_reason = "" - if self._agent_goal_reached[agent_addr]: - end_reason = "goal_reached" - elif self._timeout_reached(agent_addr): - end_reason = "max_steps" - else: - end_reason = "game_lost" - reward += self._world._rewards["detection"] + reward = self._agent_rewards[agent_addr] + end_reason = self._agent_statuses[agent_addr] new_observation = Observation( current_observation.state, reward=reward, @@ -586,7 +613,7 @@ def goal_dict_satistfied(goal_dict:dict, known_dict: dict)-> bool: if len(matching_keys) == len(goal_dict.keys()): return True except KeyError: - #some keys are missing in the known_dict + # some keys are missing in the known_dict return False return False @@ -615,18 +642,58 @@ def _check_detection(self, agent_addr:tuple, last_action:Action)->bool: self.logger.info("\tNot detected!") return detection - def _timeout_reached(self, agent_addr:tuple) ->bool: + def _max_steps_reached(self, agent_addr:tuple) ->bool: """ Checks if the agent reached the max allowed steps. Only applies to role 'Attacker' """ self.logger.debug(f"Checking timout for {self.agents[agent_addr]}") - if self.agents[agent_addr][1] == "Attacker": - if self._agent_steps[agent_addr] >= self._steps_limit: + agent_role = self.agents[agent_addr][1] + if self._steps_limit_per_role[agent_role]: + if self._agent_steps[agent_addr] >= self._steps_limit_per_role[agent_role]: self.logger.info("Timeout reached by {self.agents[agent_addr]}!") return True else: + self.logger.debug(f"No max steps defined for role {agent_role}") return False + def _assign_end_rewards(self)->None: + """ + Method which assings rewards to each agent which has finished playing + """ + is_episode_over = self.episode_end + for agent, status in self._agent_statuses.items(): + if agent not in self._agent_rewards.keys(): # reward has not been assigned yet + agent_name, agent_role = self.agents[agent] + if agent_role == "Attacker": + match status: + case "goal_reached": + self._agent_rewards[agent] = self._world._rewards["goal"] + case "max_steps": + self._agent_rewards[agent] = 0 + case "blocked": + self._agent_rewards[agent] = self._world._rewards["detection"] + self.logger.info(f"End reward for {agent_name}({agent_role}, status: '{status}') = {self._agent_rewards[agent]}") + elif agent_role == "Defender": + if self._agent_statuses[agent] == "max_steps": #defender was responsible for the end + raise NotImplementedError + self._agent_rewards[agent] = 0 + else: + if is_episode_over: #only assign defender's reward when episode ends + sucessful_attacks = list(self._agent_statuses.values).count("goal_reached") + if sucessful_attacks > 0: + self._agent_rewards[agent] = sucessful_attacks*self._world._rewards["detection"] + self._agent_statuses[agent] = "game_lost" + else: #no successful attacker + self._agent_rewards[agent] = self._world._rewards["goal"] + self._agent_statuses[agent] = "goal_reached" + self.logger.info(f"End reward for {agent_name}({agent_role}, status: '{status}') = {self._agent_rewards[agent]}") + else: + if is_episode_over: + self._agent_rewards[agent] = 0 + self.logger.info(f"End reward for {agent_name}({agent_role}, status: '{status}') = {self._agent_rewards[agent]}") + + + __version__ = "v0.2.2" @@ -668,7 +735,7 @@ def _timeout_reached(self, agent_addr:tuple) ->bool: action="store", required=False, type=str, - default="INFO", + default="WARNING", ) args = parser.parse_args() diff --git a/docs/Coordinator.md b/docs/Coordinator.md index e8f65076..5bca3080 100644 --- a/docs/Coordinator.md +++ b/docs/Coordinator.md @@ -1,4 +1,43 @@ # Coordinator +Coordinator is the centerpiece of the game orchestration. It provides an interface between the agents and the AIDojo world. + +1. Registration of new agents in the game +2. Verification of agents' actionf format +3. Recording (and storing) trajectories of agents +4. Detection of episode ends (either by reaching timout or agents reaching their respective goals) +5. Assigning rewards for each action and at the end of each episode +6. Removing agents from the game +7. Registering the GameReset requests and handelling the game resets. + +## Main components of the coordinator +`self._actions_queue`: asycnio queue for agent -> aidojo_world communication +`self._answers_queue`: asycnio queue for aidojo_world -> agent communication +`self.ALLOWED_ROLES`: list of allowed agent roles [`Attacker`, `Defender`, `Benign`] +`self._world`: Instance of `AIDojoWorld`. Implements the dynamics of the world +`self._CONFIG_FILE_HASH`: hash of the configuration file used in the interaction (scenario, topology, etc.). Used for better reproducibility of results +`self._starting_positions_per_role`: dictionary of starting position of each agent type from `self.ALLOWED_ROLES` +`self._win_conditions_per_role`: dictionary of goal state for each agent type from `self.ALLOWED_ROLES` +`self._goal_description_per_role`: dictionary of textual description of goal of each agent type from `self.ALLOWED_ROLES` +`self._steps_limit_per_role`: dictionary of maximum allowed steps per episode for of each agent type from `self.ALLOWED_ROLES` +`self._use_global_defender`: Inditaction of presence of Global defender (deprecated) + +### Agent information components +`self.agents`: information about connected agents {`agent address`: (`agent_name`,`agent_role`)} +`self._agent_steps`: step counter for each agent in the current episode +`self._reset_requests`: dictionary where requests for episode reset are collected (the world resets only if ALL agents request reset) +`self._agent_observations`: current observation per agent +`self._agent_starting_position`: starting position (with wildcards, see [configuration](../README.md#task-configuration)) per agent +`self._agent_states`: current GameState per agent +`self._agent_statuses`: status of each agent. One of following options: + - `playing`: agent is registered and can participate in current episode. Can't influence the episode termination + - `playing_active`: agent is registered and can participate in current episode. It has `goal` and `max_steps` defined and can influence the termination of the episode + - `goal_reached`: agent has reached it's goal in this episode. It can't perform any more actions until the interaction is resetted. + - `blocked`: agent has been blocked. It can't perform any more actions until the interaction is resetted. + - `max_steps`: agent has reached it's maximum allowed steps. It can't perform any more actions until the interaction is resetted. + + +`self._agent_rewards`: dictionary of final reward of each agent in the current episod. Only agent's which can't participate in the ongoing episode are listed. +`self._agent_trajectories`: complete trajectories for each agent in the ongoing episode ## The format of the messages to the agents is { diff --git a/env/netsecenv_conf.yaml b/env/netsecenv_conf.yaml index 45675b2e..e24a5424 100644 --- a/env/netsecenv_conf.yaml +++ b/env/netsecenv_conf.yaml @@ -4,6 +4,7 @@ coordinator: # Define the conf for all the 'attacker' agents. agents: Attacker: + max_steps: 50 goal: description: "Exfiltrate data to '213.47.23.195'" is_any_part_of_goal_random: True @@ -28,7 +29,6 @@ coordinator: known_blocks: {} # Example of known blocks. In the host 192.168.2.2, block all connections coming or going to 192.168.1.3 # known_blocks: {192.168.2.2: {192.168.1.3}} - start_position: known_networks: [] known_hosts: [] @@ -99,7 +99,6 @@ env: # random_seed: 42 scenario: 'scenario1' use_global_defender: False - max_steps: 50 use_dynamic_addresses: False use_firewall: True save_trajectories: False diff --git a/env/worlds/network_security_game.py b/env/worlds/network_security_game.py index c2bd3532..b7dec48f 100755 --- a/env/worlds/network_security_game.py +++ b/env/worlds/network_security_game.py @@ -45,9 +45,6 @@ def __init__(self, task_config_file, world_name="NetSecEnv") -> None: self._seed = seed self.logger.info(f'Setting env seed to {seed}') - # Set maximum number of steps in one episode - self._max_steps = self.task_config.get_max_steps() - self.logger.info(f"\tSetting max steps to {self._max_steps}") # Set rewards for goal/detection/step self._rewards = { diff --git a/tests/manual/three_nets/test_three_net_scenario.py b/tests/manual/three_nets/test_three_net_scenario.py index 34a067a9..eff14236 100644 --- a/tests/manual/three_nets/test_three_net_scenario.py +++ b/tests/manual/three_nets/test_three_net_scenario.py @@ -11,7 +11,7 @@ if __name__ == "__main__": # !!! RUN THE COORDINATOR IN SEPARATE PROCESS AS FOLLOWS !!! - # python3 coordinator.py --task_config=./tests/manual/three_nets/three_net_testing_conf.yaml + # python3 coordinator.py --task_config=./tests/manual/three_nets/three_net_testing_conf.yaml -l DEBUG NSE_config = "./three_net_testing_conf.yaml" coordinator_conf = "../../../coordinator.conf" log_filename = "three_net_test.log" diff --git a/tests/manual/three_nets/three_net_testing_conf.yaml b/tests/manual/three_nets/three_net_testing_conf.yaml index 93414c96..8829b4b5 100644 --- a/tests/manual/three_nets/three_net_testing_conf.yaml +++ b/tests/manual/three_nets/three_net_testing_conf.yaml @@ -9,6 +9,7 @@ coordinator: # Define the conf for all the 'attacker' agents. agents: Attacker: + max_steps: 15 goal: description: "Exfiltrate password DB data to '213.47.23.195'" is_any_part_of_goal_random: True @@ -58,7 +59,6 @@ env: # random_seed: 42 scenario: 'three_nets' use_global_defender: False - max_steps: 15 use_dynamic_addresses: False use_firewall: True save_trajectories: False diff --git a/tests/netsecenv-task-for-testing.yaml b/tests/netsecenv-task-for-testing.yaml index a7883b12..6fa91775 100644 --- a/tests/netsecenv-task-for-testing.yaml +++ b/tests/netsecenv-task-for-testing.yaml @@ -5,6 +5,7 @@ coordinator: # Define the conf for all the 'attacker' agents. agents: Attacker: + max_steps: 15 goal: description: "Exfiltrate DatabaseData to '213.47.23.195'" # Put randomize_goal_every_episode in True if you put any of the goal values in 'random' @@ -53,7 +54,7 @@ coordinator: #known_data: {213.47.23.195: [User1,DataFromServer1], 192.168.3.1: [User3,Data3FromServer3]} # Example to ask a random data in a specific server. Putting 'random' in the data, forces the env to randomly choose where the goal data is # known_data: {213.47.23.195: [random]} - known_blocks: {'all_routers': 'all_attackers'} + known_blocks: {} # Example of known blocks. In the host 192.168.2.2, block all connections coming or going to 192.168.1.3 # known_blocks: {192.168.2.2: {192.168.1.3}} # You can also use the wildcard string 'all_routers', and 'all_attackers', to mean that all the controlled hosts of all the attackers should be in this list in order to win @@ -83,7 +84,6 @@ env: # Or you can fix the seed # random_seed: 42 scenario: 'scenario1_small' - max_steps: 15 store_replay_buffer: False use_dynamic_addresses: False goal_reward: 100 diff --git a/tests/test_coordinator.py b/tests/test_coordinator.py index 2b15ebc8..6413a36c 100644 --- a/tests/test_coordinator.py +++ b/tests/test_coordinator.py @@ -53,8 +53,8 @@ def test_class_init(self): assert coord._agent_starting_position == {} assert coord._agent_observations == {} assert coord._agent_states == {} - assert coord._agent_goal_reached == {} - assert coord._agent_episode_ends == {} + assert coord._agent_rewards == {} + assert coord._agent_statuses == {} assert type(coord._actions_queue) is queue.Queue assert type(coord._answers_queue) is queue.Queue @@ -69,7 +69,7 @@ def test_initialize_new_player(self, coordinator_init): assert coord.agents[agent_addr] == (agent_name, agent_role) assert coord._agent_steps[agent_addr] == 0 assert not coord._reset_requests[agent_addr] - assert not coord._agent_episode_ends[agent_addr] + assert coord._agent_statuses[agent_addr] == "playing_active" assert new_obs.reward == 0 assert new_obs.end is False @@ -219,5 +219,7 @@ def test_timeout(self, coordinator_registered_player): assert result["to_agent"] == ("192.168.1.1", "3300") assert result["status"] == "GameStatus.OK" assert init_result["observation"]["state"] != result["observation"]["state"] + assert coord._agent_steps[("192.168.1.1", "3300")] == 15 + assert coord._agent_statuses[("192.168.1.1", "3300")] == "max_steps" assert result["observation"]["end"] assert result["observation"]["info"]["end_reason"] == "max_steps" \ No newline at end of file diff --git a/utils/utils.py b/utils/utils.py index a2a8dc2a..7a53d276 100644 --- a/utils/utils.py +++ b/utils/utils.py @@ -376,12 +376,20 @@ def get_win_conditions(self, agent_role): } case _: raise ValueError(f"Unsupported agent role: {agent_role}") - def get_max_steps(self): + + def get_max_steps(self, role=str)->int: """ - Get the max steps + Get the max steps based on agent's role """ - max_steps = self.config['env']['max_steps'] - return int(max_steps) + try: + max_steps = int(self.config['coordinator']['agents'][role]["max_steps"]) + except KeyError: + max_steps = None + self.logger.warning(f"Item 'max_steps' not found in 'coordinator.agents.{role}'!. Setting value to default=None (no step limit)") + except TypeError as e: + max_steps = None + self.logger.warning(f"Unsupported value in 'coordinator.agents.{role}.max_steps': {e}. Setting value to default=None (no step limit)") + return max_steps def get_goal_description(self, agent_role)->dict: