update documentation

reiniscimurs · reiniscimurs · commit 741f502ec9a1 · 2025-04-23T09:56:42.000+02:00
diff --git a/robot_nav/models/CNNTD3/CNNTD3.py b/robot_nav/models/CNNTD3/CNNTD3.py
@@ -55,7 +55,7 @@ def forward(self, s):
                               The last 5 elements are [distance, cos, sin, lin_vel, ang_vel].
 
         Returns:
-            torch.Tensor: Action tensor of shape (batch_size, action_dim),
+            (torch.Tensor): Action tensor of shape (batch_size, action_dim),
                           with values in range [-1, 1] due to tanh activation.
         """
         if len(s.shape) == 1:
@@ -138,7 +138,7 @@ def forward(self, s, action):
             action (torch.Tensor): Current action tensor of shape (batch_size, action_dim).
 
         Returns:
-            tuple:
+            (tuple):
                 - q1 (torch.Tensor): First Q-value estimate (batch_size, 1).
                 - q2 (torch.Tensor): Second Q-value estimate (batch_size, 1).
         """
diff --git a/robot_nav/replay_buffer.py b/robot_nav/replay_buffer.py
@@ -50,7 +50,7 @@ def size(self):
         Get the number of elements currently in the buffer.
 
         Returns:
-            int: Current buffer size.
+            (int): Current buffer size.
         """
         return self.count
 
@@ -62,7 +62,7 @@ def sample_batch(self, batch_size):
             batch_size (int): Number of experiences to sample.
 
         Returns:
-            Tuple of np.ndarrays: Batches of states, actions, rewards, done flags, and next states.
+            (Tuple of np.ndarrays): Batches of states, actions, rewards, done flags, and next states.
         """
         if self.count < batch_size:
             batch = random.sample(self.buffer, self.count)
@@ -82,7 +82,7 @@ def return_buffer(self):
         Return the entire buffer contents as separate arrays.
 
         Returns:
-            Tuple of np.ndarrays: Full arrays of states, actions, rewards, done flags, and next states.
+            (Tuple of np.ndarrays): Full arrays of states, actions, rewards, done flags, and next states.
         """
         s = np.array([_[0] for _ in self.buffer])
         a = np.array([_[1] for _ in self.buffer])
@@ -149,7 +149,7 @@ def size(self):
         Get the number of complete episodes in the buffer.
 
         Returns:
-            int: Number of episodes.
+            (int): Number of episodes.
         """
         return self.count
 
@@ -163,7 +163,7 @@ def sample_batch(self, batch_size):
             batch_size (int): Number of sequences to sample.
 
         Returns:
-            Tuple of np.ndarrays: Sequences of past states, actions, rewards, done flags, and next states.
+            (Tuple of np.ndarrays): Sequences of past states, actions, rewards, done flags, and next states.
         """
         if self.count < batch_size:
             batch = random.sample(
diff --git a/robot_nav/sim.py b/robot_nav/sim.py
@@ -39,7 +39,7 @@ def step(self, lin_velocity=0.0, ang_velocity=0.1):
             ang_velocity (float): Angular velocity to apply to the robot.
 
         Returns:
-            tuple: Contains the latest LIDAR scan, distance to goal, cosine and sine of angle to goal,
+            (tuple): Contains the latest LIDAR scan, distance to goal, cosine and sine of angle to goal,
                    collision flag, goal reached flag, applied action, and computed reward.
         """
         self.env.step(action_id=0, action=np.array([[lin_velocity], [ang_velocity]]))
@@ -80,7 +80,7 @@ def reset(
             random_obstacle_ids (list or None): Specific obstacle IDs to randomize.
 
         Returns:
-            tuple: Initial observation after reset, including LIDAR scan, distance, cos/sin,
+            (tuple): Initial observation after reset, including LIDAR scan, distance, cos/sin,
                    and reward-related flags and values.
         """
         if robot_state is None:
@@ -128,7 +128,7 @@ def cossin(vec1, vec2):
             vec2 (list): Second 2D vector.
 
         Returns:
-            tuple: (cosine, sine) of the angle between the vectors.
+            (tuple): (cosine, sine) of the angle between the vectors.
         """
         vec1 = vec1 / np.linalg.norm(vec1)
         vec2 = vec2 / np.linalg.norm(vec2)
@@ -148,7 +148,7 @@ def get_reward(goal, collision, action, laser_scan):
             laser_scan (list): The LIDAR scan readings.
 
         Returns:
-            float: Computed reward for the current state.
+            (float): Computed reward for the current state.
         """
         if goal:
             return 100.0
diff --git a/robot_nav/utils.py b/robot_nav/utils.py
@@ -36,7 +36,7 @@ def load_buffer(self):
         Load samples from the specified files and populate the replay buffer.
 
         Returns:
-            object: The populated replay buffer.
+            (object): The populated replay buffer.
         """
         for file_name in self.file_names:
             print("Loading file: ", file_name)
@@ -141,7 +141,7 @@ def get_buffer(
         history_len (int, optional): Used for RCPG buffer configuration. Defaults to 10.
 
     Returns:
-        object: The initialized and optionally pre-populated replay buffer.
+        (object): The initialized and optionally pre-populated replay buffer.
     """
     if isinstance(model, PPO):
         return model.buffer
@@ -210,7 +210,7 @@ def get_max_bound(
         device (torch.device): PyTorch device for computation.
 
     Returns:
-        torch.Tensor: Maximum return bound for each sample in the batch.
+        (torch.Tensor): Maximum return bound for each sample in the batch.
     """
     next_state = next_state.clone()  # Prevents in-place modifications
     reward = reward.clone()  # Ensures original reward is unchanged