update documentation

reiniscimurs · reiniscimurs · commit 52bf738f5501 · 2025-04-23T09:48:28.000+02:00
diff --git a/docs/index.md b/docs/index.md
@@ -5,8 +5,6 @@
 Deep Reinforcement Learning algorithm implementation for simulated robot navigation in IR-SIM. Using 2D laser sensor data
 and information about the goal point a robot learns to navigate to a specified point in the environment.
 
-![Example](https://github.com/reiniscimurs/DRL-robot-navigation-IR-SIM/blob/master/out.gif)
-
 **Installation**
 
 * Package versioning is managed with poetry \
diff --git a/mkdocs.yml b/mkdocs.yml
@@ -8,7 +8,7 @@ nav:
     Models:
       - DDPG: api/models/DDPG.md
       - TD3: api/models/TD3.md
-      - CNNTD3: api/models/train.md
+      - CNNTD3: api/models/cnntd3.md
       - RCPG: api/models/RCPG.md
       - HCM: api/models/HCM.md
       - PPO: api/models/PPO.md
diff --git a/robot_nav/models/CNNTD3/CNNTD3.py b/robot_nav/models/CNNTD3/CNNTD3.py
@@ -252,7 +252,7 @@ def get_action(self, obs, add_noise):
             add_noise (bool): Whether to add exploration noise to the action.
 
         Returns:
-            np.ndarray: The selected action.
+            (np.ndarray): The selected action.
         """
         if add_noise:
             return (
@@ -269,7 +269,7 @@ def act(self, state):
             state (np.ndarray): Input state.
 
         Returns:
-            np.ndarray: Action predicted by the actor network.
+            (np.ndarray): Action predicted by the actor network.
         """
         # Function to get the action from the actor
         state = torch.Tensor(state).to(self.device)
@@ -472,7 +472,7 @@ def prepare_state(self, latest_scan, distance, cos, sin, collision, goal, action
             action (list or np.ndarray): Last action taken [lin_vel, ang_vel].
 
         Returns:
-            tuple:
+            (tuple):
                 - state (list): Normalized and concatenated state vector.
                 - terminal (int): Terminal flag (1 if collision or goal, else 0).
         """
diff --git a/robot_nav/models/DDPG/DDPG.py b/robot_nav/models/DDPG/DDPG.py
@@ -46,7 +46,7 @@ def forward(self, s):
             s (torch.Tensor): Input state tensor of shape (batch_size, state_dim).
 
         Returns:
-            torch.Tensor: Output action tensor of shape (batch_size, action_dim), scaled to [-1, 1].
+            (torch.Tensor): Output action tensor of shape (batch_size, action_dim), scaled to [-1, 1].
         """
         s = F.leaky_relu(self.layer_1(s))
         s = F.leaky_relu(self.layer_2(s))
@@ -95,7 +95,7 @@ def forward(self, s, a):
             a (torch.Tensor): Action tensor of shape (batch_size, action_dim).
 
         Returns:
-            torch.Tensor: Q-value tensor of shape (batch_size, 1).
+            (torch.Tensor): Q-value tensor of shape (batch_size, 1).
         """
         s1 = F.leaky_relu(self.layer_1(s))
         self.layer_2_s(s1)
@@ -182,7 +182,7 @@ def get_action(self, obs, add_noise):
             add_noise (bool): Whether to add exploration noise to the action.
 
         Returns:
-            np.array: Action selected by the actor network.
+            (np.array): Action selected by the actor network.
         """
         if add_noise:
             return (
@@ -199,7 +199,7 @@ def act(self, state):
             state (np.array): Environment state.
 
         Returns:
-            np.array: Action values as output by the actor network.
+            (np.array): Action values as output by the actor network.
         """
         state = torch.Tensor(state).to(self.device)
         return self.actor(state).cpu().data.numpy().flatten()
@@ -225,7 +225,7 @@ def train(
         Trains the actor and critic networks using a replay buffer and soft target updates.
 
         Args:
-            replay_buffer (object): Replay buffer object with a sample_batch method.
+            replay_buffer (ReplayBuffer): Replay buffer object with a sample_batch method.
             iterations (int): Number of training iterations.
             batch_size (int): Size of each training batch.
             discount (float): Discount factor for future rewards.
@@ -397,7 +397,7 @@ def prepare_state(self, latest_scan, distance, cos, sin, collision, goal, action
             action (list or np.array): The action taken in the previous step.
 
         Returns:
-            tuple: (state vector, terminal flag)
+            (tuple): (state vector, terminal flag)
         """
         latest_scan = np.array(latest_scan)
 
diff --git a/robot_nav/models/HCM/hardcoded_model.py b/robot_nav/models/HCM/hardcoded_model.py
@@ -8,10 +8,11 @@
 
 class HCM(object):
     """
-    A class representing a hybrid control model (HCM) for a robot's navigation system.
+    A class representing a Hard-Coded model (HCM) for a robot's navigation system.
 
     This class contains methods for generating actions based on the robot's state, preparing state
     representations, training (placeholder method), saving/loading models, and logging experiences.
+    The method is suboptimal in order to collect collisions for pre-training of DRL models.
 
     Attributes:
         max_action (float): The maximum possible action value.
@@ -59,7 +60,7 @@ def get_action(self, state, add_noise):
             add_noise (bool): Whether to add noise to the action for exploration.
 
         Returns:
-            list: The computed action [linear velocity, angular velocity].
+            (list): The computed action [linear velocity, angular velocity].
         """
         sin = state[-3]
         cos = state[-4]
@@ -99,7 +100,7 @@ def train(
         Placeholder method for training the hybrid control model.
 
         Args:
-            replay_buffer (object): The replay buffer containing past experiences.
+            replay_buffer (object): The replay buffer containing experiences.
             iterations (int): The number of training iterations.
             batch_size (int): The batch size for training.
             discount (float): The discount factor for future rewards.
@@ -153,7 +154,7 @@ def prepare_state(self, latest_scan, distance, cos, sin, collision, goal, action
             action (list): The action taken by the robot, [linear velocity, angular velocity].
 
         Returns:
-            tuple: A tuple containing the prepared state and a terminal flag (1 if terminal state, 0 otherwise).
+            (tuple): A tuple containing the prepared state and a terminal flag (1 if terminal state, 0 otherwise).
         """
         latest_scan = np.array(latest_scan)
 
diff --git a/robot_nav/models/PPO/PPO.py b/robot_nav/models/PPO/PPO.py
@@ -47,11 +47,11 @@ def add(self, state, action, reward, terminal, next_state):
         Add a transition to the buffer. (Partial implementation.)
 
         Args:
-            state: The current observed state.
-            action: The action taken.
-            reward: The reward received after taking the action.
+            state (list or np.array): The current observed state.
+            action (list or np.array): The action taken.
+            reward (float): The reward received after taking the action.
             terminal (bool): Whether the episode terminated.
-            next_state: The resulting state after taking the action.
+            next_state (list or np.array): The resulting state after taking the action.
         """
         self.states.append(state)
         self.rewards.append(reward)
@@ -137,7 +137,7 @@ def act(self, state, sample):
             sample (bool): Whether to sample from the action distribution or use mean.
 
         Returns:
-            Tuple[Tensor, Tensor, Tensor]: Sampled (or mean) action, log probability, and state value.
+            (Tuple[Tensor, Tensor, Tensor]): Sampled (or mean) action, log probability, and state value.
         """
         action_mean = self.actor(state)
         cov_mat = torch.diag(self.action_var).unsqueeze(dim=0)
@@ -163,7 +163,7 @@ def evaluate(self, state, action):
             action (Tensor): Batch of actions.
 
         Returns:
-            Tuple[Tensor, Tensor, Tensor]: Action log probabilities, state values, and distribution entropy.
+            (Tuple[Tensor, Tensor, Tensor]): Action log probabilities, state values, and distribution entropy.
         """
         action_mean = self.actor(state)
 
@@ -306,7 +306,7 @@ def get_action(self, state, add_noise):
             add_noise (bool): Whether to sample from the distribution (True) or use the deterministic mean (False).
 
         Returns:
-            np.ndarray: Sampled action.
+            (np.ndarray): Sampled action.
         """
 
         with torch.no_grad():
@@ -326,7 +326,7 @@ def train(self, replay_buffer, iterations, batch_size):
         Train the policy and value function using PPO loss based on the stored rollout buffer.
 
         Args:
-            replay_buffer: Placeholder for compatibility (not used).
+            replay_buffer (object): Placeholder for compatibility (not used).
             iterations (int): Number of epochs to optimize the policy per update.
             batch_size (int): Batch size (not used; training uses the whole buffer).
         """
@@ -434,7 +434,7 @@ def prepare_state(self, latest_scan, distance, cos, sin, collision, goal, action
             action (tuple[float, float]): Last action taken (linear and angular velocities).
 
         Returns:
-            tuple[list[float], int]: Processed state vector and terminal flag (1 if terminal, else 0).
+            (tuple[list[float], int]): Processed state vector and terminal flag (1 if terminal, else 0).
         """
         latest_scan = np.array(latest_scan)