diff --git a/.github/workflows/deploy-docs.yml b/.github/workflows/deploy-docs.yml
new file mode 100644
index 00000000..e5d51fd7
--- /dev/null
+++ b/.github/workflows/deploy-docs.yml
@@ -0,0 +1,33 @@
+name: Deploy MkDocs site to GitHub Pages
+
+on:
+  push:
+    branches:
+      - main  
+
+jobs:
+  deploy:
+    runs-on: ubuntu-latest
+
+    steps:
+      - name: Checkout repo
+        uses: actions/checkout@v3
+
+      - name: Set up Python
+        uses: actions/setup-python@v4
+        with:
+          python-version: '3.12' 
+
+      - name: Install dependencies
+        run: |
+          python -m pip install --upgrade pip
+          pip install mkdocs-material mkdocstrings pymdown-extensions
+
+      - name: Build MkDocs site
+        run: mkdocs build --strict
+
+      - name: Deploy to GitHub Pages
+        uses: peaceiris/actions-gh-pages@v4
+        with:
+          github_token: ${{ secrets.GITHUB_TOKEN }}
+          publish_dir: ./site
\ No newline at end of file
diff --git a/AIDojoCoordinator/coordinator.py b/AIDojoCoordinator/coordinator.py
index 0d474d37..a554b9ab 100644
--- a/AIDojoCoordinator/coordinator.py
+++ b/AIDojoCoordinator/coordinator.py
@@ -14,9 +14,24 @@
 
 class AgentServer(asyncio.Protocol):
     """
-    Class used for serving the agents when conneting to the game run by th GameCoordinator.
+    Class used for serving the agents when connecting to the game run by the GameCoordinator.
+
+    Attributes:
+        actions_queue (asyncio.Queue): Queue for actions from agents.
+        answers_queues (dict): Mapping of agent addresses to their response queues.
+        max_connections (int): Maximum allowed concurrent agent connections.
+        current_connections (int): Current number of connected agents.
+        logger (logging.Logger): Logger for the AgentServer.
     """
     def __init__(self, actions_queue, agent_response_queues, max_connections):
+        """
+        Initialize the AgentServer.
+
+        Args:
+            actions_queue (asyncio.Queue): Queue for actions from agents.
+            agent_response_queues (dict): Mapping of agent addresses to their response queues.
+            max_connections (int): Maximum allowed concurrent agent connections.
+        """
         self.actions_queue = actions_queue
         self.answers_queues = agent_response_queues
         self.max_connections = max_connections
@@ -26,6 +41,9 @@ def __init__(self, actions_queue, agent_response_queues, max_connections):
     async def handle_agent_quit(self, peername:tuple):
         """
         Helper function to handle agent disconnection.
+
+        Args:
+            peername (tuple): The address of the disconnecting agent.
         """
         # Send a quit message to the Coordinator
         self.logger.info(f"\tHandling agent quit for {peername}.")
@@ -35,6 +53,10 @@ async def handle_agent_quit(self, peername:tuple):
     async def handle_new_agent(self, reader, writer):
         """
         Handle a new agent connection.
+
+        Args:
+            reader (asyncio.StreamReader): Stream reader for the agent.
+            writer (asyncio.StreamWriter): Stream writer for the agent.
         """
         # get the peername of the writer
         peername = writer.get_extra_info("peername")
@@ -102,6 +124,13 @@ async def handle_new_agent(self, reader, writer):
                 # swallow exceptions on close to avoid crash on cleanup
                 pass
     async def __call__(self, reader, writer):
+        """
+        Allow the server instance to be called as a coroutine.
+
+        Args:
+            reader (asyncio.StreamReader): Stream reader for the agent.
+            writer (asyncio.StreamWriter): Stream writer for the agent.
+        """
         await self.handle_new_agent(reader, writer)
 
 class GameCoordinator:
diff --git a/AIDojoCoordinator/game_components.py b/AIDojoCoordinator/game_components.py
index a42c923e..fcde7614 100755
--- a/AIDojoCoordinator/game_components.py
+++ b/AIDojoCoordinator/game_components.py
@@ -15,7 +15,13 @@
 @dataclass(frozen=True, eq=True, order=True)
 class Service():
     """
-    Service represents the service object in the NetSecGame
+    Represents a service in the NetSecGame.
+
+    Attributes:
+        name (str): Name of the service.
+        type (str): Type of the service. Default `uknown`
+        version (str): Version of the service. Default `uknown`
+        is_local (bool): Whether the service is local. Default True
     """
     name: str
     type: str = "unknown"
@@ -23,39 +29,70 @@ class Service():
     is_local: bool = True
 
     @classmethod
-    def from_dict(cls, data: dict):
+    def from_dict(cls, data: dict)->"Service":
+        """
+        Create a Service object from a dictionary.
+
+        Args:
+            data (dict): Dictionary with service attributes.
+
+        Returns:
+            Service: The created Service object.
+        """
         return cls(**data)
 
-"""
-IP represents the ip address object in the NetSecGame
-"""
+
 @dataclass(frozen=True, eq=True, order=True)
 class IP():
     """
-    Receives one parameter ip that should be a string
+    Immutable object representing an IPv4 address in the NetSecGame.
+
+    Attributes:
+        ip (str): The IP address in dot-decimal notation.
     """
     ip: str
 
     def __post_init__(self):
         """
-        Check if the provided IP is valid
+        Verify if the provided IP is valid.
+
+        Raises:
+            ValueError: If the IP address is invalid.
         """
         try:
             ipaddress.ip_address(self.ip)
         except ValueError:
             raise ValueError(f"Invalid IP address provided: {self.ip}")
 
-    def __repr__(self):
+    def __repr__(self)->str:
+        """
+        Return the string representation of the IP.
+
+        Returns:
+            str: The IP address.
+        """
         return self.ip
 
-    def __eq__(self, other):
+    def __eq__(self, other)->bool:
+        """
+        Check equality with another IP object.
+
+        Args:
+            other (IP): Another IP object.
+
+        Returns:
+            is_equal: True if equal, False otherwise.
+        """
         if not isinstance(other, IP):
             return NotImplemented
         return self.ip == other.ip
         
-    def is_private(self):
+    def is_private(self)->bool:
         """
-        Return if the IP is private or not
+        Check if the IP address is private. Uses ipaddress module.
+
+        Returns:
+            is_private: True if the IP is private, False otherwise.
         """
         try:
             return ipaddress.IPv4Network(self.ip).is_private
@@ -65,48 +102,110 @@ def is_private(self):
             if self.ip != 'external':
                 return True
             return False
+    
     @classmethod
-    def from_dict(cls, data: dict):
+    def from_dict(cls, data: dict)->"IP":
+        """
+        Build the IP object from a dictionary representation.
+
+        Args:
+            data (dict): Dictionary with IP attributes.
+
+        Returns:
+            IP: The created IP object.
+        """
         return cls(**data)
     
-    def __hash__(self):
+    def __hash__(self)->int:
+        """
+        Compute the hash of the IP.
+
+        Returns:
+            hash: The hash value.
+        """
         return hash(self.ip)
 
 @dataclass(frozen=True, eq=True)
 class Network():
     """
-    Network represents the network object in the NetSecGame
+    Immutable object representing an IPv4 network in the NetSecGame.
+
+    Attributes:
+        ip (str): IP address of the network.
+        mask (int): CIDR mask of the network.
     """
     ip: str
     mask: int
 
-    def __repr__(self):
+    def __repr__(self)->str:
+        """
+        Return the string representation of the network.
+
+        Returns:
+            str: The network in CIDR notation.
+        """
         return f"{self.ip}/{self.mask}"
 
-    def __str__(self):
+    def __str__(self)->str:
+        """
+        Return the string representation of the network.
+
+        Returns:
+            str: The network in CIDR notation.
+        """
         return f"{self.ip}/{self.mask}"
 
-    def __lt__(self, other):
+    def __lt__(self, other)->bool:
+        """
+        Less-than comparison for networks.
+
+        Args:
+            other (Network): Another network.
+
+        Returns:
+            bool: True if self < other, False otherwise.
+        """
         try:
             return netaddr.IPNetwork(str(self)) < netaddr.IPNetwork(str(other))
         except netaddr.core.AddrFormatError:
             return str(self.ip) < str(other.ip)
     
-    def __le__(self, other):
+    def __le__(self, other)->bool:
+        """
+        Less-than-or-equal comparison for networks.
+
+        Args:
+            other (Network): Another network.
+
+        Returns:
+            bool: True if self <= other, False otherwise.
+        """
         try:
             return netaddr.IPNetwork(str(self)) <= netaddr.IPNetwork(str(other))
         except netaddr.core.AddrFormatError:
             return str(self.ip) <= str(other.ip)
     
-    def __gt__(self, other):
+    def __gt__(self, other)->bool:
+        """
+        Greater-than comparison for networks.
+
+        Args:
+            other (Network): Another network.
+
+        Returns:
+            bool: True if self > other, False otherwise.
+        """
         try:
             return netaddr.IPNetwork(str(self)) > netaddr.IPNetwork(str(other))
         except netaddr.core.AddrFormatError:
             return str(self.ip) > str(other.ip)
     
-    def is_private(self):
+    def is_private(self)->bool:
         """
-        Return if a network is private or not
+        Check if the network is private. Uses ipaddress module.
+
+        Returns:
+            bool: True if the network is private, False otherwise.
         """
         try:
             return ipaddress.IPv4Network(f'{self.ip}/{self.mask}',strict=False).is_private
@@ -115,18 +214,29 @@ def is_private(self):
             return True
     
     @classmethod
-    def from_dict(cls, data: dict):
+    def from_dict(cls, data: dict)->"Network":
+        """
+        Build the Network object from a dictionary.
+
+        Args:
+            data (dict): Dictionary with network attributes.
+
+        Returns:
+            Network: The created Network object.
+        """
         return cls(**data)
 
-"""
-Data represents the data object in the NetSecGame
-"""
 @dataclass(frozen=True, eq=True, order=True)
 class Data():
     """
-    Class to define dta
-    owner is the 'user' owner
-    id is the string of the data
+    Represents a data object in the NetSecGame.
+
+    Attributes:
+        owner (str): Owner of the data. 
+        id (str): Identifier of the data.
+        size (int): Size of the data. Default = 0
+        type (str): Type of the data. Default = ""
+        content (str): Content of the data. Default = ""
     """
     owner: str
     id: str
@@ -135,13 +245,31 @@ class Data():
     content: str = field(compare=False, hash=False, repr=False, default_factory=str)
 
     def __hash__(self) -> int:
+        """
+        Compute the hash of the Data object.
+
+        Returns:
+            int: The hash value.
+        """
         return hash((self.owner, self.id, self.type))
     @classmethod
-    def from_dict(cls, data: dict):
+    def from_dict(cls, data: dict)->"Data":
+        """
+        Build the Data object from a dictionary.
+
+        Args:
+            data (dict): Dictionary with data attributes.
+
+        Returns:
+            Data: The created Data object.
+        """
         return cls(**data)
 
 @enum.unique
 class ActionType(enum.Enum):
+    """
+    Enum representing possible action types in the NetSecGame.
+    """
     ScanNetwork = "ScanNetwork"
     FindServices = "FindServices"
     FindData = "FindData"
@@ -152,11 +280,25 @@ class ActionType(enum.Enum):
     QuitGame = "QuitGame"
     ResetGame = "ResetGame"
 
-    def to_string(self):
-        """Convert enum to string."""
+    def to_string(self)->str:
+        """
+        Convert the ActionType enum to string.
+
+        Returns:
+            str: The string representation.
+        """
         return self.value
     
-    def __eq__(self, other):
+    def __eq__(self, other)->bool:
+        """
+        Compare ActionType with another ActionType or string.
+
+        Args:
+            other (ActionType or str): The object to compare.
+
+        Returns:
+            bool: True if equal, False otherwise.
+        """
         # Compare with another ActionType
         if isinstance(other, ActionType):
             return self.value == other.value
@@ -165,13 +307,30 @@ def __eq__(self, other):
            return self.value == other.replace("ActionType.", "")
         return False
 
-    def __hash__(self):
+    def __hash__(self)->int:
+        """
+        Compute the hash of the ActionType.
+
+        Returns:
+            int: The hash value.
+        """
         # Use the hash of the value for consistent behavior
         return hash(self.value)
 
     @classmethod
-    def from_string(cls, name):
-        """Convert string to enum, stripping 'ActionType.' if present."""
+    def from_string(cls, name)->"ActionType":
+        """
+        Convert a string to an ActionType enum. Strips 'ActionType.' if present.
+
+        Args:
+            name (str): The string representation.
+
+        Returns:
+            ActionType: The corresponding ActionType.
+
+        Raises:
+            ValueError: If the string does not match any ActionType.
+        """
         if name.startswith("ActionType."):
             name = name.split("ActionType.")[1]
         try:
@@ -182,30 +341,58 @@ def from_string(cls, name):
 @dataclass(frozen=True, eq=True, order=True)
 class AgentInfo():
     """
-    Receives one parameter ip that should be a string
+    Represents agent information.
+
+    Attributes:
+        name (str): Name of the agent.
+        role (str): Role of the agent.
     """
     name: str
     role: str
 
-    def __repr__(self):
+    def __repr__(self)->str:
+        """
+        Return the string representation of the AgentInfo.
+
+        Returns:
+            str: The agent info as a string.
+        """
         return f"{self.name}({self.role})"
 
 
     @classmethod
-    def from_dict(cls, data: dict):
+    def from_dict(cls, data: dict)->"AgentInfo":
+        """
+        Build the AgentInfo object from a dictionary.
+
+        Args:
+            data (dict): Dictionary with agent info attributes.
+
+        Returns:
+            AgentInfo: The created AgentInfo object.
+        """
         return cls(**data)
 
 @dataclass(frozen=True)
 class Action:
     """
     Immutable dataclass representing an Action.
+
+    Attributes:
+        action_type (ActionType): The type of action.
+        parameters (Dict[str, Any]): Parameters for the action.
     """
     action_type: ActionType
     parameters: Dict[str, Any] = field(default_factory=dict)
 
     @property
     def as_dict(self) -> Dict[str, Any]:
-        """Return a dictionary representation of the Action."""
+        """
+        Return a dictionary representation of the Action.
+
+        Returns:
+            Dict[str, Any]: The action as a dictionary.
+        """
         params = {}
         for k, v in self.parameters.items():
             if hasattr(v, '__dict__'):  # Handle custom objects like Service, Data, AgentInfo
@@ -215,16 +402,38 @@ def as_dict(self) -> Dict[str, Any]:
         return {"action_type": str(self.action_type), "parameters": params}
     
     @property
-    def type(self):
-        return self.action_type
+    def type(self)->ActionType:
+        """
+        Return the action type.
 
+        Returns:
+            ActionType: The action type.
+        """
+        return self.action_type
+    
     def to_json(self) -> str:
-        """Serialize the Action to a JSON string."""
+        """
+        Serialize the Action to a JSON string.
+
+        Returns:
+            str: The JSON string representation.
+        """
         return json.dumps(self.as_dict)
 
     @classmethod
     def from_dict(cls, data_dict: Dict[str, Any]) -> "Action":
-        """Create an Action from a dictionary."""
+        """
+        Create an Action from a dictionary.
+
+        Args:
+            data_dict (Dict[str, Any]): The action as a dictionary.
+
+        Returns:
+            Action: The created Action object.
+
+        Raises:
+            ValueError: If an unsupported parameter is encountered.
+        """
         action_type = ActionType.from_string(data_dict["action_type"])
         params = {}
         for k, v in data_dict["parameters"].items():
@@ -247,17 +456,46 @@ def from_dict(cls, data_dict: Dict[str, Any]) -> "Action":
 
     @classmethod
     def from_json(cls, json_string: str) -> "Action":
-        """Create an Action from a JSON string."""
+        """
+        Create an Action from a JSON string.
+
+        Args:
+            json_string (str): The JSON string representation.
+
+        Returns:
+            Action: The created Action object.
+        """
         data_dict = json.loads(json_string)
         return cls.from_dict(data_dict)
 
     def __repr__(self) -> str:
+        """
+        Return the string representation of the Action.
+
+        Returns:
+            str: The action as a string.
+        """
         return f"Action <{self.action_type}|{self.parameters}>"
 
     def __str__(self) -> str:
+        """
+        Return the string representation of the Action.
+
+        Returns:
+            str: The action as a string.
+        """
         return f"Action <{self.action_type}|{self.parameters}>"
 
     def __eq__(self, other: object) -> bool:
+        """
+        Check equality with another Action object.
+
+        Args:
+            other (object): Another Action object.
+
+        Returns:
+            bool: True if equal, False otherwise.
+        """
         if not isinstance(other, Action):
             return NotImplemented
         return (
@@ -266,6 +504,12 @@ def __eq__(self, other: object) -> bool:
         )
     
     def __hash__(self) -> int:
+        """
+        Compute the hash of the Action.
+
+        Returns:
+            int: The hash value.
+        """
         # Convert parameters to a sorted tuple of key-value pairs for consistency
         sorted_params = tuple(sorted((k, hash(v)) for k, v in self.parameters.items()))
         return hash((self.action_type, sorted_params))
@@ -273,7 +517,15 @@ def __hash__(self) -> int:
 @dataclass(frozen=True)
 class GameState():
     """
-    Game state represents the states in the game state space.
+    Represents the state of the game.
+
+    Attributes:
+        controlled_hosts (set): Controlled hosts.
+        known_hosts (set): Known hosts.
+        known_services (dict): Known services.
+        known_data (dict): Known data.
+        known_networks (set): Known networks.
+        known_blocks (dict): Known blocks.
     """
     controlled_hosts: set = field(default_factory=set, hash=True)
     known_hosts: set = field(default_factory=set, hash=True)
@@ -283,7 +535,13 @@ class GameState():
     known_blocks: dict = field(default_factory=dict, hash=True)
     
     @property
-    def as_graph(self):
+    def as_graph(self)->tuple:
+        """
+        Build a graph representation of the game state.
+
+        Returns:
+            tuple: (node_features, controlled, edges, node_index_map)
+        """
         node_types = {"network":0, "host":1, "service":2, "datapoint":3, "blocks": 4}
         graph_nodes = {}
         node_features = []
@@ -345,11 +603,20 @@ def as_graph(self):
         return node_features, controlled, edges, {v:k for k, v in graph_nodes.items()}
 
     def __str__(self) -> str:
+        """
+        Return the string representation of the GameState.
+
+        Returns:
+            str: The game state as a string.
+        """
         return f"State<nets:{self.known_networks}; known:{self.known_hosts}; owned:{self.controlled_hosts}; services:{self.known_services}; data:{self.known_data}; blocks:{self.known_blocks}>"    
 
     def as_json(self) -> str:
         """
-        Returns json representation of the GameState in string
+        Return the JSON representation of the GameState.
+
+        Returns:
+            str: The JSON string.
         """
         ret_dict = self.as_dict
         return json.dumps(ret_dict)
@@ -357,7 +624,10 @@ def as_json(self) -> str:
     @property
     def as_dict(self)->dict:
         """
-        Returns dict representation of the GameState in string
+        Return the dictionary representation of the GameState.
+
+        Returns:
+            dict: The game state as a dictionary.
         """
         ret_dict = {"known_networks":[dataclasses.asdict(x) for x in self.known_networks],
             "known_hosts":[dataclasses.asdict(x) for x in self.known_hosts],
@@ -369,7 +639,16 @@ def as_dict(self)->dict:
         return ret_dict
 
     @classmethod
-    def from_dict(cls, data_dict:dict):
+    def from_dict(cls, data_dict:dict)->"GameState":
+        """
+        Create a GameState from a dictionary.
+
+        Args:
+            data_dict (dict): The game state as a dictionary.
+
+        Returns:
+            GameState: The created GameState object.
+        """
         if "known_blocks" in data_dict:
             known_blocks = {IP(target_host):{IP(blocked_host["ip"]) for blocked_host in blocked_hosts} for target_host, blocked_hosts in data_dict["known_blocks"].items()}
         else:
@@ -386,9 +665,15 @@ def from_dict(cls, data_dict:dict):
         return state
 
     @classmethod
-    def from_json(cls, json_string):
+    def from_json(cls, json_string)->"GameState":
         """
-        Creates GameState object from json representation in string
+        Create a GameState from a JSON string.
+
+        Args:
+            json_string (str): The JSON string.
+
+        Returns:
+            GameState: The created GameState object.
         """
         json_data = json.loads(json_string)
         state = GameState(
@@ -416,6 +701,9 @@ def from_json(cls, json_string):
 
 @enum.unique
 class GameStatus(enum.Enum):
+    """
+    Enum representing possible game statuses.
+    """
     OK = 200
 
     CREATED = 201
@@ -424,7 +712,16 @@ class GameStatus(enum.Enum):
     FORBIDDEN = 403
     
     @classmethod
-    def from_string(cls, string:str):
+    def from_string(cls, string:str)->"GameStatus":
+        """
+        Convert a string to a GameStatus enum.
+
+        Args:
+            string (str): The string representation.
+
+        Returns:
+            GameStatus: The corresponding GameStatus.
+        """
         match string:
             case "GameStatus.OK":
                 return GameStatus.OK
@@ -437,11 +734,20 @@ def from_string(cls, string:str):
             case "GameStatus.RESET_DONE":
                 return GameStatus.RESET_DONE
     def __repr__(self) -> str:
+        """
+        Return the string representation of the GameStatus.
+
+        Returns:
+            str: The game status as a string.
+        """
         return str(self)
 
 
 @enum.unique
 class AgentStatus(enum.Enum):
+    """
+    Enum representing possible agent statuses.
+    """
     Playing = "Playing"
     PlayingWithTimeout = "PlayingWithTimeout"
     TimeoutReached = "TimeoutReached"
@@ -449,11 +755,25 @@ class AgentStatus(enum.Enum):
     Success = "Success"
     Fail = "Fail"
     
-    def to_string(self):
-        """Convert enum to string."""
+    def to_string(self)->str:
+        """
+        Convert the AgentStatus enum to string.
+
+        Returns:
+            str: The string representation.
+        """
         return self.value
     
-    def __eq__(self, other):
+    def __eq__(self, other)->bool:
+        """
+        Compare AgentStatus with another AgentStatus or string.
+
+        Args:
+            other (AgentStatus or str): The object to compare.
+
+        Returns:
+            bool: True if equal, False otherwise.
+        """
         # Compare with another ActionType
         if isinstance(other, AgentStatus):
             return self.value == other.value
@@ -462,13 +782,30 @@ def __eq__(self, other):
            return self.value == other.replace("AgentStatus.", "")
         return False
 
-    def __hash__(self):
+    def __hash__(self)->int:
+        """
+        Compute the hash of the AgentStatus.
+
+        Returns:
+            int: The hash value.
+        """
         # Use the hash of the value for consistent behavior
         return hash(self.value)
 
     @classmethod
-    def from_string(cls, name):
-        """Convert string to enum, stripping 'AgentStatus.' if present."""
+    def from_string(cls, name)->"AgentStatus":
+        """
+        Convert a string to an AgentStatus enum.
+
+        Args:
+            name (str): The string representation.
+
+        Returns:
+            AgentStatus: The corresponding AgentStatus.
+
+        Raises:
+            ValueError: If the string does not match any AgentStatus.
+        """
         if name.startswith("AgentStatus."):
             name = name.split("AgentStatus.")[1]
         try:
@@ -478,5 +815,12 @@ def from_string(cls, name):
 
 @dataclass(frozen=True)
 class ProtocolConfig:
+    """
+    Configuration for protocol constants.
+
+    Attributes:
+        END_OF_MESSAGE (bytes): End-of-message marker.
+        BUFFER_SIZE (int): Buffer size for messages.
+    """
     END_OF_MESSAGE = b"EOF"
     BUFFER_SIZE = 8192 
\ No newline at end of file
diff --git a/NetSecGameAgents b/NetSecGameAgents
index 3a46c12d..6c27fa4d 160000
--- a/NetSecGameAgents
+++ b/NetSecGameAgents
@@ -1 +1 @@
-Subproject commit 3a46c12d64334303210cdf6b373177febe642079
+Subproject commit 6c27fa4d84bee0841b90a728fd86d996a2d17c88
diff --git a/docs/architecture.md b/docs/architecture.md
new file mode 100644
index 00000000..30b18bf9
--- /dev/null
+++ b/docs/architecture.md
@@ -0,0 +1,110 @@
+## NetSecGame Architecture
+The Network Security Game(NSG) works as a game server - agents connect to it via TCP sockets and interact with the environment using the standard RL communication loop: Agent submits actinon and recieves new observation of the environment. The NSG supports real-time, highly customizable multi-agent simulations.
+
+## Game Components
+The following classes are used in the game to hold information about the state of the game. They are used both in the [Actions](#actions) and [GameState](#gamestate). See the API Reference for [GameComponents](game_components.md)
+### Building blocks
+#### IP
+IP is immutable object that represents an IPv4 object in the NetSecGame. It has a single parameter of the address in a dot-decimal notation (4 octet represeted as decimal value separeted by dots).
+
+Example: 
+```python
+ ip = IP("192.168.1.1")
+```
+
+#### Network
+Network is immutable object that represents an IPv4 network object in the NetSecGame. It has 2 parameters:
+- `network_ip:str` representing the IPv4 address of the network.
+- `mask:int` representing the mask in the CIDR notation.
+
+Example: 
+```python
+net = Network("192.168.1.0", 24)
+```
+#### Service
+Service class holds information about services running in hosts. Each Service has four parameters:
+- `name`:str  - Name of the service (e.g., "SSH")
+- `type`:str - `passive` or `active`. Currently not being used.
+- `version`:str - version of the service.
+- `is_local`:bool - flag specifying if the service is local only. (if `True`, service is NOT visible without controlling the host).
+
+Example: 
+```python
+s = Service('postgresql', 'passive', '14.3.0', False)
+```
+
+#### Data
+Data class holds information about datapoints (files) present in the NetSecGame.
+Each data instance has two parameters:
+- `owner`:str - specifying the user who owns this datapoint
+- `id`: str - unique identifier of the datapoint in a host
+- `size`: int - size of the datapoint (optional, default=0)
+- `type`: str - identification of a type of the file (optional, default="")
+- `content`: str - content of the data (optional, default="")
+
+Examples:
+```python
+d1 = Data("User1", "DatabaseData")
+d2 = Data("User1", "DatabaseData", size=42, type="txt", "SecretUserDatabase")
+```
+
+### GameState
+GameState is an object that represents a view of the NetSecGame environment in a given state. It is constructed as a collection of 'assets' available to the agent. GameState has following parts:
+- `known_networks`: Set of [Network](#network) objects that the agent is aware of
+- `known_hosts`: Set of [IP](#ip) objects that the agent is aware of
+- `controlled_hosts`: Set of [IP](#ip) objetcs that the agent has control over. Note that `controlled_hosts` is a subset of `known_hosts`.
+- `known_services`: Dictionary of services that the agent is aware of.
+The dictionary format: {`IP`: {`Service`}} where [IP](#ip) object is a key and the value is a set of [Service](#service) objects located in the `IP`.
+- `known_data`: Dictionary of data instances that the agent is aware of. The dictionary format: {`IP`: {`Data`}} where [IP](#ip) object is a key and the value is a set of [Data](#data) objects located in the `IP`.
+- `known_blocks`: Dictionary of firewall blocks the agent is aware of. It is a dictionary with format: {`target_IP`: {`blocked_IP`, `blocked_IP`}}. Where `target_IP` is the [IP](#ip) where the FW rule was applied (usually a router) and `blocked_IP` is the IP address that is blocked. For now the blocks happen in both input and output direction simultaneously.
+
+
+### Actions
+Actions are the objects sent by the agents to the environment. Each action is evaluated by NetSecGame and executed if
+1. It is a valid Action
+2. Can be processed in the current state of the environment
+
+In all cases, when an agent sends an action to NetSecGame, it is given a response.
+
+#### Action format
+The Action consists of two parts
+1. ActionType - specifying the class of the action
+2. parameters - dictionary with specific parameters related to the used ActionType
+
+#### List of ActionTypes
+- **JoinGame**, params={`agent_info`:AgentInfo(`<name>`, `<role>`)}: Used to register agent in a game with a given `<role>`.
+- **QuitGame**, params={}: Used for termination of agent's interaction.
+- **ResetGame**, params={`request_trajectory`:`bool`}: Used for requesting reset of the game to it's initial position. If `request_trajectory = True`, the coordinator will send back the complete trajectory of the previous run in the next message.
+---
+- **ScanNetwork**, params{`source_host`:`<IP>`, `target_network`:`<Network>`}: Scans the given `<Network>` from a specified source host. Discovers ALL hosts in a network that are accessible from `<IP>`. If successful, returns set of discovered `<IP>` objects.
+- **FindServices**, params={`source_host`:`<IP>`, `target_host`:`<IP>`}: Used to discover ALL services running in the `target_host` if the host is accessible from `source_host`. If successful, returns a set of all discovered `<Service>` objects.
+- **FindData**, params={`source_host`:`<IP>`, `target_host`:`<IP>`}: Searches `target_host` for data. If `source_host` differs from `target_host`, success depends on accessability from the `source_host`. If successful, returns a set of all discovered `<Data>` objects.
+- **ExploitService**, params={`source_host`:`<IP>`, `target_host`:`<IP>`, `taget_service`:`<Service>`}: Exploits `target_service` in a specified `target_host`. If successful, the attacker gains control of the `target_host`.
+- **ExfiltrateData**, params{`source_host`:`<IP>`, `target_host`:`<IP>`, `data`:`<IP>`}: Copies `data` from the `source_host` to `target_host` IF both are controlled and `target_host` is accessible from `source_host`.
+
+### Action preconditions and effects
+In the following table, we describe the effects of selected actions and their preconditions. Note that if the preconditions are not satisfied, the actions's effects are not applied.
+
+| Action | Params | Preconditions | Effects |
+|----------------------|----------------------|----------------------|----------------------|
+| ScanNetwork| `source_host`, `target_network`| `source_host` &isinv; `controlled_hosts`| extends `known_networks`|
+|FindServices| `source_host`, `target_host`| `source_host` &isinv; `controlled_hosts`| extends `known_services` AND `known_hosts`|
+|FindData| `source_host`, `target_host`| `source_host`, `target_host` ∈ `controlled_hosts`| extends `known_data`|
+|Exploit Service | `source_host`, `target_host`, `target_service`|`source_host` &isinv; `controlled_hosts`| extends `controlled_hosts` with `target_host`|
+ExfiltrateData| `source_host`,`target_host`, `data` |`source_host`, `target_host` ∈ `controlled_hosts` AND `data` ∈ `known_data`| extends `known_data[target_host]` with `data`|
+|BlockIP | `source_host`, `target_host`, `blockedIP`|`source_host` &isinv; `controlled_hosts`| extends `known_blocks[target_host]` with `blockedIP`|
+
+#### Assumption and Conditions for Actions
+1. When playing the `ExploitService` action, it is expected that the agent has discovered this service before (by playing `FindServices` in the `target_host` before this action)
+2. The `Find Data` action finds all the available data in the host if successful.
+3. The `Find Data` action requires ownership of the target host.
+4. Playing `ExfiltrateData` requires controlling **BOTH** source and target hosts
+5. Playing `Find Services` can be used to discover hosts (if those have any active services)
+6. Parameters of `ScanNetwork` and `FindServices` can be chosen arbitrarily (they don't have to be listed in `known_newtworks`/`known_hosts`)
+
+### Observations
+After submitting Action `a` to the environment, agents receive an `Observation` in return. Each observation consists of 4 parts:
+- `state`:`Gamestate` - with the current view of the environment [state](#gamestate)
+- `reward`: `int` - with the immediate reward agent gets for playing Action `a`
+- `end`:`bool` - indicating if the interaction can continue after playing Action `a`
+- `info`: `dict` - placeholder for any information given to the agent (e.g., the reason why `end is True` )
\ No newline at end of file
diff --git a/docs/configuration.md b/docs/configuration.md
new file mode 100644
index 00000000..2500fbf4
--- /dev/null
+++ b/docs/configuration.md
@@ -0,0 +1,157 @@
+The NetSecEnv is highly configurable in terms of the properties of the world, tasks, and agent interaction. Modification of the world is done in the YAML configuration file in two main areas:
+1. Environment (`env` section) controls the properties of the world (taxonomy of networks, maximum allowed steps per episode, probabilities of action success, etc.)
+2. Task configuration defines the agents' properties (starting position, goal, etc.)
+
+## Environment configuration
+The environment part defines the properties of the environment for the task (see the example below). In particular:
+
+- `random_seed` - sets seed for any random processes in the environment
+- `scenario` - sets the scenario (network topology) used in the task:
+    - `one_network` - several client computers and servers in single local network
+    -  `two_networks_tiny` - single client and server in separate local networks + remote C&C server
+    -  `two_networks_small` - single client and 5 servers in separate local networks + remote C&C server
+    -  `two_networks` - 5 clients and 5 servers in separate local networks + remote C&C server
+    -  `three_net_scenario` - 5 clients in a local network, 5 servers split in 2 additional local networks + remote C&C server
+- `save_tajectories` - if `True`, interaction of the agents is serialized and stored in a file
+- `use_dynamic_addresses` - if `True`, the network and IP addresses defined in `scenario` are randomly changed at the beginning of **EVERY** episode (the network topology is kept as defined in the `scenario`. Relations between networks are kept, IPs inside networks are chosen at random based on the network IP and mask)
+- `use_firewall` - if `True` firewall rules defined in `scenario` are used when executing actions. When `False`, the firewall is ignored, and all connections are allowed (Default)
+- `use_global_defender` - if `True`, enables global defendr which is part of the environment and can stop interaction of any playing agent.
+- `required_players` - Minimum required players for the game to start (default 1)
+- `rewards`:
+    - `success` - sets reward which agent gets when it reaches the goal (default 100)
+    - `fail` - sets the reward that which agent does not reach it's objective (default -10)
+    - `step_reward` - sets reward which agent gets for every step taken (default -1)
+- `actions` - defines the probability of success for every ActionType
+
+```YAML
+env:
+    random_seed: 'random'
+    scenario: 'scenario1'
+    use_global_defender: False
+    use_dynamic_addresses: False
+    use_firewall: True
+    save_trajectories: False
+    rewards:
+        win: 100
+        step: -1
+        loss: -10
+    actions:
+        scan_network:
+        prob_success: 1.0
+        find_services:
+        prob_success: 1.0
+        exploit_service:
+        prob_success: 1.0
+        find_data:
+        prob_success: 1.0
+        exfiltrate_data:
+        prob_success: 1.0
+        block_ip:
+        prob_success: 1.0
+```
+### Definition of the network topology
+The network topology and rules are defined using a [CYST](https://pypi.org/project/cyst/) simulator configuration. Cyst defines a complex network configuration, and this environment does not use all Cyst features for now. CYST components currently used are:
+
+- Server hosts (are a NodeConf in CYST)
+    - Interfaces, each with one IP address
+    - Users that can log in to the host
+    - Active and passive services
+    - Data in the server
+    - To which network is connected
+- Client host (are a Node in CYST)
+    - Interfaces, each with one IP address
+    - To which network is connected
+    - Active and passive services if any
+    - Data in the client
+- Router (are a RouterConf in CYST)
+    - Interfaces, each with one IP address
+    - Networks
+    - Allowed connections between hosts
+- Internet host (as an external router) (are a Node in RouterConf)
+    - Interfaces, each with one IP address
+    - Which host can connect
+- Exploits
+    - which service is the exploit linked to
+    
+## Task configuration
+The task configuration part (section `coordinator[agents]`) defines the starting and goal position of the attacker and the type of defender that is used.
+
+### Attacker configuration
+Configuration of the attacking agents. Consists of three parts:
+1. Goal definition (`goal`) which describes the `GameState` properties that must be fulfilled to award `win` reward to the attacker:
+    - `known_networks:`(list)
+    - `known_hosts`(list)
+    - `controlled_hosts`(list)
+    - `known_services`(dict)
+    - `known_data`(dict)
+    - `known_blocks`(dict)
+
+     Each of the parts can be empty (not part of the goal, exactly defined (e.g., `known_networks: [192.168.1.0/24, 192.168.3.0/24]`) or include the keyword `random` (`controlled_hosts: [213.47.23.195, random]`, `known_data: {213.47.23.195: [random]}`.
+    Additionally,  if `random` keyword is used in the goal definition, 
+    `randomize_goal_every_episode`. If set to `True`, each keyword `random` is replaced with a randomly selected, valid option at the beginning of **EVERY** episode. If set to `False`, randomization is performed only **once** when the environment is 
+2. Definition of starting position (`start_position`), which describes the `GameState` in which the attacker starts. It consists of:
+    - `known_networks:`(list)
+    - `known_hosts`(list)
+    - `controlled_hosts`(list)
+    - `known_services`(dict)
+    - `known_data`(dict)
+    - `known_blocks`(dict)
+
+    The initial network configuration must assign at least **one** controlled host to the attacker in the network. Any item in `controlled_hosts` is copied to `known_hosts`, so there is no need to include these in both sets. `known_networks` is also extended with a set of **all** networks accessible from the `controlled_hosts`
+3. Definition of maximum allowed amount of steps:
+    - `max_steps:`(int): defines the maximum allowed number of steps for attackers in **each** episode.
+
+Example attacker configuration:
+```YAML
+coordinator:
+    agents:
+        Attacker:
+            max_steps: 20
+            goal:
+            randomize_goal_every_episode: False
+            known_networks: []
+            known_hosts: []
+            controlled_hosts: []
+            known_services: {192.168.1.3: [Local system, lanman server, 10.0.19041, False], 192.168.1.4: [Other system, SMB server, 21.2.39421, False]}
+            known_data: {213.47.23.195: ["random"]}
+            known_blocks: {'all_routers': 'all_attackers'}
+
+            start_position:
+            known_networks: []
+            known_hosts: []
+            # The attacker must always at least control the CC if the goal is to exfiltrate there
+            # Example of fixing the starting point of the agent in a local host
+            controlled_hosts: [213.47.23.195, random]
+            # Services are defined as a target host where the service must be, and then a description in the form 'name, type, version, is_local'
+            known_services: {}
+            known_data: {}
+            known_blocks: {}
+```
+
+### Defender configuration
+Currently, the defender **is** a separate agent.
+
+If you want a defender in the game, you must connect a defender agent. For playing without a defender, leave the section empty.
+
+Example of defender configuration:
+```YAML
+   Defender:
+      goal:
+        description: "Block all attackers"
+        known_networks: []
+        known_hosts: []
+        controlled_hosts: []
+        known_services: {}
+        known_data: {}
+        known_blocks: {}
+
+      start_position:
+        known_networks: []
+        known_hosts: []
+        controlled_hosts: [all_local]
+        known_services: {}
+        known_data: {}
+        blocked_ips: {}
+        known_blocks: {}
+```
+As in other agents, the description is only a text for the agent, so it can know what is supposed to do to win. In the curent implementation, the *Defender* wins, if **NO ATTACKER** reaches their goal. 
\ No newline at end of file
diff --git a/docs/game_components.md b/docs/game_components.md
new file mode 100644
index 00000000..109af552
--- /dev/null
+++ b/docs/game_components.md
@@ -0,0 +1,2 @@
+# Game Components
+::: AIDojoCoordinator.game_components
\ No newline at end of file
diff --git a/docs/game_coordinator.md b/docs/game_coordinator.md
new file mode 100644
index 00000000..40b92a3b
--- /dev/null
+++ b/docs/game_coordinator.md
@@ -0,0 +1,17 @@
+# Game Coordinator
+Coordinator is the centerpiece of the game orchestration. It provides an interface between the agents and the worlds.
+
+In detail it handles:
+
+1. World initialiazation
+2. Registration of new agents in the game
+3. Agent-World communication (message verification and forwarding)
+4. Recording (and storing) trajectories of agents (optional)
+4. Detection of episode ends (either by reaching timout or agents reaching their respective goals)
+5. Assigning rewards for each action and at the end of each episode
+6. Removing agents from the game
+7. Registering the GameReset requests and handelling the game resets.
+
+To facilitate the communication the coordinator uses a TCP server to which agents connect. The communication is asynchronous and depends of the
+::: AIDojoCoordinator.coordinator.AgentServer
+::: AIDojoCoordinator.coordinator.GameCoordinator
\ No newline at end of file
diff --git a/docs/index.md b/docs/index.md
new file mode 100644
index 00000000..f17fdd3b
--- /dev/null
+++ b/docs/index.md
@@ -0,0 +1,147 @@
+# Network Security Game
+
+The NetSecGame (Network Security Game) is a framework for training and evaluation of AI agents in the network security tasks (both offensive and defensive). It is build with [CYST](https://pypi.org/project/cyst/) network simulator and enables rapid development and testing of AI agents in highly configurable scenarios. Examples of implemented agents can be seen in the submodule [NetSecGameAgents](https://github.com/stratosphereips/NetSecGameAgents/tree/main).
+
+## Installation Guide
+It is recommended to install the NetSecGame in a virual environement:
+### Python venv
+1. 
+```bash
+python -m venv <venv-name>
+```
+2. 
+```bash
+source <venv-name>/bin/activate
+```
+
+### Conda
+1. 
+```bash
+conda create --name aidojo python==3.12
+```
+2. 
+```bash
+conda activate aidojo
+```
+
+After the virtual environment is activated, install using pip:
+```bash
+pip install -e .
+```
+### With Docker
+The NetSecGame can be run in a Docker container. You can build the image locally with:
+```bash 
+docker build -t aidojo-nsg-coordinator:latest .
+```
+or use the availabe image from [Dockerhub](https://hub.docker.com/r/lukasond/aidojo-coordinator).
+```bash
+docker pull lukasond/aidojo-coordinator:1.0.2
+```
+## Quick Start
+A task configuration needs to be specified to start the NetSecGame (see [Configuration](configuration.md)). For the first step, the example task configuration is recommended:
+```yaml
+# Example of the task configuration for NetSecGame
+# The objective of the Attacker in this task is to locate specific data
+# and exfiltrate it to a remote C&C server.
+# The scenario starts AFTER initial breach of the local network
+# (the attacker controls 1 local device + the remote C&C server).
+
+coordinator:
+  agents:
+    Attacker: # Configuration of 'Attacker' agents
+      max_steps: 25
+      goal:
+        description: "Exfiltrate data from Samba server to remote C&C server."
+        is_any_part_of_goal_random: True
+        known_networks: []
+        known_hosts: []
+        controlled_hosts: []
+        known_services: {}
+        known_data: {213.47.23.195: [[User1,DataFromServer1]]} # winning condition
+        known_blocks: {}
+      start_position: # Defined starting position of the attacker
+        known_networks: []
+        known_hosts: []
+        controlled_hosts: [213.47.23.195, random] #
+        known_services: {}
+        known_data: {}
+        known_blocks: {}
+
+    Defender:
+      goal:
+        description: "Block all attackers"
+        is_any_part_of_goal_random: False
+        known_networks: []
+        known_hosts: []
+        controlled_hosts: []
+        known_services: {}
+        known_data: {}
+        known_blocks: {213.47.23.195: 'all_attackers'}
+
+      start_position:
+        known_networks: []
+        known_hosts: []
+        controlled_hosts: []
+        known_services: {}
+        known_data: {}
+        blocked_ips: {}
+        known_blocks: {}
+
+env:
+  scenario: 'two_networks_tiny' # use the smallest topology for this example
+  use_global_defender: False # Do not use global SIEM Defender
+  use_dynamic_addresses: False # Do not randomize IP addresses
+  use_firewall: True # Use firewall
+  save_trajectories: False # Do not store trajectories
+  required_players: 1
+  rewards: # Configurable reward function
+    success: 100
+    step: -1
+    fail: -10
+    false_positive: -5 
+```
+
+The game can be started with:
+```bash
+python3 -m AIDojoCoordinator.worlds.NSEGameCoordinator \
+  --task_config=./examples/example_config.yaml \
+  --game_port=9000
+```
+Upon which the game server is created on `localhost:9000` to which the agents can connect to interact in the NetSecGame.
+### Docker Container
+When running in the Docker container, the NetSecGame can be started with:
+```bash
+docker run -it --rm \
+  -v $(pwd)/examples/example_config.yaml:/aidojo/netsecenv_conf.yaml \
+  -v $(pwd)/logs:/aidojo/logs \
+  -p 9000:9000 lukasond/aidojo-coordinator:1.0.2
+```
+
+## Documentation
+The NetSecGame environment has several components in the following files:
+```
+├── AIDojoGameCoordinator/
+|   ├── game_coordinator.py
+|	├── game_components.py
+|	├── global_defender.py
+|	├── worlds/
+|		├── NSGCoordinator.py
+|		├── NSGRealWorldCoordinator.py
+|		├── CYSTCoordinator.py
+|	├── scenarios/
+|	├── utils/
+|		├── utils.py
+|		├── log_parser.py
+|		├── gamaplay_graphs.py
+|		├── actions_parser.py
+```
+Some compoments are described in detail in following sections:
+
+- [Architecture](architecture.md) describes the architecture and important design decisions of the NetSecGame
+- [Configuration](configuration.md) describes the task and scenario configuration for NetSecGame
+- [API Reference](game_components.md) provides details of the API
+
+## About
+This code was developed at the [Stratosphere Laboratory at the Czech Technical University in Prague](https://www.stratosphereips.org/). The project is supported by Strategic Support for the Development of Security Research in the Czech Republic 2019–2025 (IMPAKT 1) program, by the Ministry of the Interior of the Czech Republic under No.
+VJ02010020 – AI-Dojo: Multi-agent testbed for the
+research and testing of AI-driven cyber security technologies.
\ No newline at end of file
diff --git a/examples/example_task_configuration.yaml b/examples/example_task_configuration.yaml
new file mode 100644
index 00000000..084054f6
--- /dev/null
+++ b/examples/example_task_configuration.yaml
@@ -0,0 +1,59 @@
+# Example of the task configuration for NetSecGame
+# The objective of the Attacker in this task is to locate specific data
+# and exfiltrate it to a remote C&C server.
+# The scenario starts AFTER initial breach of the local network
+# (the attacker controls 1 local device + the remote C&C server).
+
+coordinator:
+  agents:
+    Attacker: # Configuration of 'Attacker' agents
+      max_steps: 25
+      goal:
+        description: "Exfiltrate data from Samba server to remote C&C server."
+        is_any_part_of_goal_random: True
+        known_networks: []
+        known_hosts: []
+        controlled_hosts: []
+        known_services: {}
+        known_data: {213.47.23.195: [[User1,DataFromServer1]]} # winning condition
+        known_blocks: {}
+      start_position: # Defined starting position of the attacker
+        known_networks: []
+        known_hosts: []
+        controlled_hosts: [213.47.23.195, random] #
+        known_services: {}
+        known_data: {}
+        known_blocks: {}
+
+    Defender:
+      goal:
+        description: "Block all attackers"
+        is_any_part_of_goal_random: False
+        known_networks: []
+        known_hosts: []
+        controlled_hosts: []
+        known_services: {}
+        known_data: {}
+        known_blocks: {213.47.23.195: 'all_attackers'}
+
+      start_position:
+        known_networks: []
+        known_hosts: []
+        controlled_hosts: []
+        known_services: {}
+        known_data: {}
+        blocked_ips: {}
+        known_blocks: {}
+
+env:
+  scenario: 'two_networks_tiny' # use the smallest topology for this example
+  use_global_defender: False # Do not use global SIEM Defender
+  use_dynamic_addresses: False # Do not randomize IP addresses
+  use_firewall: True # Use firewall
+  save_trajectories: False # Do not store trajectories
+  required_players: 1
+  rewards: # Configurable reward function
+    success: 100
+    step: -1
+    fail: -10
+    false_positive: -5 
\ No newline at end of file
diff --git a/mkdocs.yml b/mkdocs.yml
new file mode 100644
index 00000000..e5fdccab
--- /dev/null
+++ b/mkdocs.yml
@@ -0,0 +1,31 @@
+site_name: Network Security Game
+theme:
+  name: material
+
+nav:
+  - Home: index.md
+  - Architecture: architecture.md
+  - Configuration: configuration.md
+  - API Reference: 
+    - game_components.md
+    - game_coordinator.md
+
+plugins:
+  - mkdocstrings:
+      default_handler: python
+      handlers:
+        python:
+          options:
+            heading_level: 2
+            show_root_heading: true
+            show_signature: true
+            show_source: true
+            members_order: alphabetical
+            merge_init_into_class: true
+
+markdown_extensions:
+  - pymdownx.arithmatex
+  - pymdownx.superfences
+
+extra_javascript:
+  - https://cdn.jsdelivr.net/npm/mathjax@3/es5/tex-mml-chtml.js
\ No newline at end of file
diff --git a/pyproject.toml b/pyproject.toml
index b4810852..2001841b 100644
--- a/pyproject.toml
+++ b/pyproject.toml
@@ -54,6 +54,13 @@ dev = [
     "pytest-asyncio"
 ]
 
+docs = [
+    "mkdocs",
+    "mkdocs-material",
+    "mkdocstrings[python]",
+    "pymdown-extensions"
+]
+
 [project.urls]
 Homepage = "https://github.com/stratosphereips/NetSecGame"
 Repository = "https://github.com/stratosphereips/NetSecGame"