update documentation

reiniscimurs · reiniscimurs · commit d8b9a75f79de · 2025-04-22T20:25:03.000+02:00
diff --git a/README.md b/README.md
@@ -26,9 +26,11 @@ and information about the goal point a robot learns to navigate to a specified p
 
 **Sources**
 
-| Package |                                           Description                                           |                              Source | 
-|:--------|:-----------------------------------------------------------------------------------------------:|------------------------------------:| 
-| IR-SIM  |                                  Light-weight robot simulator                                   | https://github.com/hanruihua/ir-sim |
+| Package |                          Description                          |                              Source | 
+|:--------|:-------------------------------------------------------------:|------------------------------------:| 
+| IR-SIM  |                 Light-weight robot simulator                  | https://github.com/hanruihua/ir-sim |
+| PythonRobotics  | Python code collection of robotics algorithms (Path planning) | https://github.com/AtsushiSakai/PythonRobotics |
+
 
 **Models**
 
diff --git a/docs/api/Path Planners/astar.md b/docs/api/Path Planners/astar.md
@@ -0,0 +1,6 @@
+# A* Path Planner
+
+::: robot_nav.path_planners.a_star
+    options:
+      show_root_heading: true
+      show_source: true
diff --git a/docs/api/Path Planners/prm.md b/docs/api/Path Planners/prm.md
@@ -0,0 +1,6 @@
+# Probabilistic Road Map (PRM) Planner
+
+::: robot_nav.path_planners.probabilistic_road_map
+    options:
+      show_root_heading: true
+      show_source: true
diff --git a/docs/api/Path Planners/rrt.md b/docs/api/Path Planners/rrt.md
@@ -0,0 +1,6 @@
+# Randomized Rapidly-Exploring Random Tree (RRT) Planner
+
+::: robot_nav.path_planners.rrt
+    options:
+      show_root_heading: true
+      show_source: true
diff --git a/docs/index.md b/docs/index.md
@@ -1,4 +1,58 @@
 # Welcome to DRL-robot-navigation-IR-SIM
 
+**DRL Robot navigation in IR-SIM**
+
 Deep Reinforcement Learning algorithm implementation for simulated robot navigation in IR-SIM. Using 2D laser sensor data
 and information about the goal point a robot learns to navigate to a specified point in the environment.
+
+![Example](https://github.com/reiniscimurs/DRL-robot-navigation-IR-SIM/blob/master/out.gif)
+
+**Installation**
+
+* Package versioning is managed with poetry \
+`pip install poetry`
+* Clone the repository \
+`git clone https://github.com/reiniscimurs/DRL-robot-navigation.git`
+* Navigate to the cloned location and install using poetry \
+`poetry install`
+
+**Training the model**
+
+* Run the training by executing the train.py file \
+`poetry run python robot_nav/train.py`
+
+* To open tensorbord, in a new terminal execute \
+`tensorboard --logdir runs`
+
+
+
+**Sources**
+
+| Package |                          Description                          |                              Source | 
+|:--------|:-------------------------------------------------------------:|------------------------------------:| 
+| IR-SIM  |                 Light-weight robot simulator                  | https://github.com/hanruihua/ir-sim |
+| PythonRobotics  | Python code collection of robotics algorithms (Path planning) | https://github.com/AtsushiSakai/PythonRobotics |
+
+
+**Models**
+
+| Model     |                                           Description                                           |                    Model                           Source | 
+|:----------|:-----------------------------------------------------------------------------------------------:|----------------------------------------------------------:|
+| TD3       |                      Twin Delayed Deep Deterministic Policy Gradient model                      | https://github.com/reiniscimurs/DRL-Robot-Navigation-ROS2 | 
+| SAC       |                                     Soft Actor-Critic model                                     |                https://github.com/denisyarats/pytorch_sac | 
+| PPO       |                               Proximal Policy Optimization model                                |            https://github.com/nikhilbarhate99/PPO-PyTorch | 
+| DDPG      |                            Deep Deterministic Policy Gradient model                             |                                          Updated from TD3 | 
+| CNNTD3    |                          TD3 model with 1D CNN encoding of laser state                          |                                                         - |
+| RCPG      | Recurrent Convolution Policy Gradient - adding recurrence layers (lstm/gru/rnn) to CNNTD3 model |                                                         - |
+
+**Max Upper Bound Models**
+
+Models that support the additional loss of Q values exceeding the maximal possible Q value in the episode. Q values that exceed this upper bound are used to calculate a loss for the model. This helps to control the overestimation of Q values in off-policy actor-critic networks.
+To enable max upper bound loss set `use_max_bound = True` when initializing a model.
+
+| Model  |  
+|:-------|
+| TD3    | 
+| DDPG   | 
+| CNNTD3 |
+
diff --git a/mkdocs.yml b/mkdocs.yml
@@ -17,11 +17,15 @@ nav:
       - Train: api/Training/train.md
       - Train RNN: api/Training/trainrnn.md
     Testing:
-      - Test: api/Training/test.md
-      - Test RNN: api/Training/testrnn.md
+      - Test: api/Testing/test.md
+      - Test RNN: api/Testing/testrnn.md
     Utils:
       - Replay Buffer: api/Utils/replay_buffer.md
       - Utils: api/Utils/utils.md
+    Path Planners:
+      - A*: api/Path Planners/astar.md
+      - PRM: api/Path Planners/prm.md
+      - RRT: api/Path Planners/rrt.md
 plugins:
 - search
 - mkdocstrings
diff --git a/robot_nav/models/TD3/TD3.py b/robot_nav/models/TD3/TD3.py
@@ -46,7 +46,7 @@ def forward(self, s):
             s (torch.Tensor): Input state tensor.
 
         Returns:
-            torch.Tensor: Action output tensor after Tanh activation.
+            (torch.Tensor): Action output tensor after Tanh activation.
         """
         s = F.leaky_relu(self.layer_1(s))
         s = F.leaky_relu(self.layer_2(s))
@@ -102,7 +102,7 @@ def forward(self, s, a):
             a (torch.Tensor): Input action tensor.
 
         Returns:
-            tuple:
+            (tuple):
                 - q1 (torch.Tensor): Output Q-value from the first critic network.
                 - q2 (torch.Tensor): Output Q-value from the second critic network.
         """
@@ -196,7 +196,7 @@ def get_action(self, obs, add_noise):
             add_noise (bool): Whether to add exploration noise.
 
         Returns:
-            np.ndarray: The chosen action clipped to [-max_action, max_action].
+            (np.ndarray): The chosen action clipped to [-max_action, max_action].
         """
         if add_noise:
             return (
@@ -213,7 +213,7 @@ def act(self, state):
             state (np.ndarray): The current environment state.
 
         Returns:
-            np.ndarray: The deterministic action predicted by the actor.
+            (np.ndarray): The deterministic action predicted by the actor.
         """
         state = torch.Tensor(state).to(self.device)
         return self.actor(state).cpu().data.numpy().flatten()
@@ -239,7 +239,7 @@ def train(
         Train the TD3 agent using batches sampled from the replay buffer.
 
         Args:
-            replay_buffer: The replay buffer to sample experiences from.
+            replay_buffer (ReplayBuffer): The replay buffer to sample experiences from.
             iterations (int): Number of training iterations to perform.
             batch_size (int): Size of each mini-batch.
             discount (float): Discount factor gamma for future rewards.
@@ -417,7 +417,7 @@ def prepare_state(self, latest_scan, distance, cos, sin, collision, goal, action
             action (list or np.ndarray): Last executed action [linear_vel, angular_vel].
 
         Returns:
-            tuple:
+            (tuple):
                 - state (list): Prepared and normalized state vector.
                 - terminal (int): 1 if episode should terminate (goal or collision), else 0.
         """
diff --git a/robot_nav/path_planners/a_star.py b/robot_nav/path_planners/a_star.py
@@ -2,8 +2,8 @@
 
 A* grid planning
 
-author: Atsushi Sakai(@Atsushi_twi)
-        Nikos Kanargias (nkana@tee.gr)
+author: Atsushi Sakai(@Atsushi_twi), Nikos Kanargias (nkana@tee.gr)
+
 
 adapted by: Reinis Cimurs
 
@@ -25,8 +25,9 @@ def __init__(self, env, resolution):
         """
         Initialize A* planner
 
-        env (EnvBase): environment where the planning will take place
-        resolution: grid resolution [m]
+        Args:
+            env (EnvBase): environment where the planning will take place
+            resolution (float): grid resolution [m]
         """
 
         self.resolution = resolution
@@ -42,13 +43,25 @@ def __init__(self, env, resolution):
         self.motion = self.get_motion_model()
 
     class Node:
+        """Node class"""
+
         def __init__(self, x, y, cost, parent_index):
+            """
+            Initialize Node
+
+            Args:
+                x (float): x position of the node
+                y (float): y position of the node
+                cost (float): heuristic cost of the node
+                parent_index (int): Nodes parent index
+            """
             self.x = x  # index of grid
             self.y = y  # index of grid
             self.cost = cost
             self.parent_index = parent_index
 
         def __str__(self):
+            """str function for Node class"""
             return (
                 str(self.x)
                 + ","
@@ -63,15 +76,16 @@ def planning(self, sx, sy, gx, gy, show_animation=True):
         """
         A star path search
 
-        input:
-            s_x: start x position [m]
-            s_y: start y position [m]
-            gx: goal x position [m]
-            gy: goal y position [m]
+        Args:
+            sx (float): start x position [m]
+            sy (float): start y position [m]
+            gx (float): goal x position [m]
+            gy (float): goal y position [m]
+            show_animation (bool): If true, shows the animation of planning process
 
-        output:
-            rx: x position list of the final path
-            ry: y position list of the final path
+        Returns:
+            rx (float): x position list of the final path
+            ry (float): y position list of the final path
         """
 
         start_node = self.Node(
@@ -158,7 +172,16 @@ def planning(self, sx, sy, gx, gy, show_animation=True):
         return rx, ry
 
     def calc_final_path(self, goal_node, closed_set):
-        # generate final course
+        """Generate the final path
+
+        Args:
+            goal_node (Node): final goal node
+            closed_set (dict): dict of closed nodes
+
+        Returns:
+            rx (list): list of x positions of final path
+            ry (list): list of y positions of final path
+        """
         rx, ry = [self.calc_grid_position(goal_node.x, self.min_x)], [
             self.calc_grid_position(goal_node.y, self.min_y)
         ]
@@ -181,20 +204,51 @@ def calc_grid_position(self, index, min_position):
         """
         calc grid position
 
-        :param index:
-        :param min_position:
-        :return:
+        Args:
+            index (int): index of a node
+            min_position (float): min value of search space
+
+        Returns:
+            pos (float): position of coordinates along the given axis
         """
         pos = index * self.resolution + min_position
         return pos
 
     def calc_xy_index(self, position, min_pos):
+        """
+        calc xy index of node
+
+        Args:
+            position (float): position of a node
+            min_pos (float): min value of search space
+
+        Returns:
+            index (int): index of position along the given axis
+        """
         return round((position - min_pos) / self.resolution)
 
     def calc_grid_index(self, node):
+        """
+        calc grid index of node
+
+        Args:
+            node (Node): node to calculate the index for
+
+        Returns:
+            index (float): grid index of the node
+        """
         return (node.y - self.min_y) * self.x_width + (node.x - self.min_x)
 
     def verify_node(self, node):
+        """
+        Check if node is acceptable - within limits of search space and free of collisions
+
+        Args:
+            node (Node): node to check
+
+        Returns:
+            result (bool): True if node is acceptable. False otherwise
+        """
         px = self.calc_grid_position(node.x, self.min_x)
         py = self.calc_grid_position(node.y, self.min_y)
 
@@ -214,6 +268,16 @@ def verify_node(self, node):
         return True
 
     def check_node(self, x, y):
+        """
+        Check positon for a collision
+
+        Args:
+            x (float): x value of the position
+            y (float): y value of the position
+
+        Returns:
+            result (bool): True if there is a collision. False otherwise
+        """
         node_position = [x, y]
         shape = {
             "name": "rectangle",
diff --git a/robot_nav/path_planners/probabilistic_road_map.py b/robot_nav/path_planners/probabilistic_road_map.py
diff --git a/robot_nav/path_planners/rrt.py b/robot_nav/path_planners/rrt.py