add utils documentation

reiniscimurs · reiniscimurs · commit 0ba57117295d · 2025-04-10T10:54:17.000+02:00
diff --git a/robot_nav/utils.py b/robot_nav/utils.py
@@ -9,6 +9,16 @@
 
 
 class Pretraining:
+    """
+    Handles loading of offline experience data and pretraining of a reinforcement learning model.
+
+    Attributes:
+        file_names (List[str]): List of YAML files containing pre-recorded environment samples.
+        model (object): The model with `prepare_state` and `train` methods.
+        replay_buffer (object): The buffer used to store experiences for training.
+        reward_function (callable): Function to compute the reward from the environment state.
+    """
+
     def __init__(
         self,
         file_names: List[str],
@@ -22,6 +32,12 @@ def __init__(
         self.reward_function = reward_function
 
     def load_buffer(self):
+        """
+        Load samples from the specified files and populate the replay buffer.
+
+        Returns:
+            object: The populated replay buffer.
+        """
         for file_name in self.file_names:
             print("Loading file: ", file_name)
             with open(file_name, "r") as file:
@@ -76,6 +92,15 @@ def train(
         iterations,
         batch_size,
     ):
+        """
+        Run pretraining on the model using the replay buffer.
+
+        Args:
+            pretraining_iterations (int): Number of outer loop iterations for pretraining.
+            replay_buffer (object): Buffer to sample training batches from.
+            iterations (int): Number of training steps per pretraining iteration.
+            batch_size (int): Batch size used during training.
+        """
         print("Running Pretraining")
         for _ in tqdm(range(pretraining_iterations)):
             self.model.train(
@@ -99,6 +124,25 @@ def get_buffer(
     file_names=["robot_nav/assets/data.yml"],
     history_len=10,
 ):
+    """
+    Get or construct the replay buffer depending on model type and training configuration.
+
+    Args:
+        model (object): The RL model, can be PPO, RCPG, or other.
+        sim (object): Simulation environment with a `get_reward` function.
+        load_saved_buffer (bool): Whether to load experiences from file.
+        pretrain (bool): Whether to run pretraining using the buffer.
+        pretraining_iterations (int): Number of outer loop iterations for pretraining.
+        training_iterations (int): Number of iterations in each training loop.
+        batch_size (int): Size of the training batch.
+        buffer_size (int, optional): Maximum size of the buffer. Defaults to 50000.
+        random_seed (int, optional): Seed for reproducibility. Defaults to 666.
+        file_names (List[str], optional): List of YAML data file paths. Defaults to ["robot_nav/assets/data.yml"].
+        history_len (int, optional): Used for RCPG buffer configuration. Defaults to 10.
+
+    Returns:
+        object: The initialized and optionally pre-populated replay buffer.
+    """
     if isinstance(model, PPO):
         return model.buffer
 
@@ -147,6 +191,27 @@ def get_max_bound(
     done,
     device,
 ):
+    """
+    Estimate the maximum possible return (upper bound) from the next state onward.
+
+    This is used in constrained RL or safe policy optimization where a conservative
+    estimate of return is useful for policy updates.
+
+    Args:
+        next_state (torch.Tensor): Tensor of next state observations.
+        discount (float): Discount factor for future rewards.
+        max_ang_vel (float): Maximum angular velocity of the agent.
+        max_lin_vel (float): Maximum linear velocity of the agent.
+        time_step (float): Duration of one time step.
+        distance_norm (float): Normalization factor for distance.
+        goal_reward (float): Reward received upon reaching the goal.
+        reward (torch.Tensor): Immediate reward from the environment.
+        done (torch.Tensor): Binary tensor indicating episode termination.
+        device (torch.device): PyTorch device for computation.
+
+    Returns:
+        torch.Tensor: Maximum return bound for each sample in the batch.
+    """
     next_state = next_state.clone()  # Prevents in-place modifications
     reward = reward.clone()  # Ensures original reward is unchanged
     done = done.clone()