add comment to summary writer and add a test env

reiniscimurs · reiniscimurs · commit d508138876d0 · 2025-03-17T11:05:26.000+01:00
diff --git a/robot_nav/models/BPG/BCNNPG.py b/robot_nav/models/BPG/BCNNPG.py
@@ -131,7 +131,7 @@ def __init__(
         self.action_dim = action_dim
         self.max_action = max_action
         self.state_dim = state_dim
-        self.writer = SummaryWriter()
+        self.writer = SummaryWriter(comment=model_name)
         self.iter_count = 0
         if load_model:
             self.load(filename=model_name, directory=load_directory)
diff --git a/robot_nav/models/BPG/BCNNTD3.py b/robot_nav/models/BPG/BCNNTD3.py
@@ -147,7 +147,7 @@ def __init__(
         self.action_dim = action_dim
         self.max_action = max_action
         self.state_dim = state_dim
-        self.writer = SummaryWriter()
+        self.writer = SummaryWriter(comment=model_name)
         self.iter_count = 0
         if load_model:
             self.load(filename=model_name, directory=load_directory)
diff --git a/robot_nav/models/BPG/BTD3.py b/robot_nav/models/BPG/BTD3.py
@@ -101,7 +101,7 @@ def __init__(
         self.action_dim = action_dim
         self.max_action = max_action
         self.state_dim = state_dim
-        self.writer = SummaryWriter()
+        self.writer = SummaryWriter(comment=model_name)
         self.iter_count = 0
         if load_model:
             self.load(filename=model_name, directory=load_directory)
diff --git a/robot_nav/models/CNNTD3/CNNTD3.py b/robot_nav/models/CNNTD3/CNNTD3.py
@@ -145,7 +145,7 @@ def __init__(
         self.action_dim = action_dim
         self.max_action = max_action
         self.state_dim = state_dim
-        self.writer = SummaryWriter()
+        self.writer = SummaryWriter(comment=model_name)
         self.iter_count = 0
         if load_model:
             self.load(filename=model_name, directory=load_directory)
diff --git a/robot_nav/models/PPO/PPO.py b/robot_nav/models/PPO/PPO.py
@@ -157,7 +157,7 @@ def __init__(
             self.load(filename=model_name, directory=load_directory)
 
         self.MseLoss = nn.MSELoss()
-        self.writer = SummaryWriter()
+        self.writer = SummaryWriter(comment=model_name)
 
     def set_action_std(self, new_action_std):
         self.action_std = new_action_std
diff --git a/robot_nav/models/RCPG/RCPG.py b/robot_nav/models/RCPG/RCPG.py
@@ -189,7 +189,7 @@ def __init__(
         self.action_dim = action_dim
         self.max_action = max_action
         self.state_dim = state_dim
-        self.writer = SummaryWriter()
+        self.writer = SummaryWriter(comment=model_name)
         self.iter_count = 0
         self.model_name = model_name + rnn
         if load_model:
diff --git a/robot_nav/models/SAC/SAC.py b/robot_nav/models/SAC/SAC.py
@@ -112,7 +112,7 @@ def __init__(
         self.actor.train(True)
         self.critic.train(True)
         self.step = 0
-        self.writer = SummaryWriter()
+        self.writer = SummaryWriter(comment=model_name)
 
     def save(self, filename, directory):
         Path(directory).mkdir(parents=True, exist_ok=True)
diff --git a/robot_nav/sim.py b/robot_nav/sim.py
@@ -7,14 +7,14 @@
 
 
 class SIM_ENV:
-    def __init__(self, world_file="robot_world.yaml"):
-        self.env = irsim.make(world_file)
+    def __init__(self, world_file="robot_world.yaml", disable_plotting=False):
+        self.env = irsim.make(world_file, disable_all_plot=disable_plotting)
         robot_info = self.env.get_robot_info(0)
         self.robot_goal = robot_info.goal
 
     def step(self, lin_velocity=0.0, ang_velocity=0.1):
         self.env.step(action_id=0, action=np.array([[lin_velocity], [ang_velocity]]))
-        self.env.render()
+        self.env.render(interval=0.01)
 
         scan = self.env.get_lidar_scan()
         latest_scan = scan["ranges"]
@@ -34,7 +34,13 @@ def step(self, lin_velocity=0.0, ang_velocity=0.1):
 
         return latest_scan, distance, cos, sin, collision, goal, action, reward
 
-    def reset(self, robot_state=None, robot_goal=None, random_obstacles=True):
+    def reset(
+        self,
+        robot_state=None,
+        robot_goal=None,
+        random_obstacles=True,
+        random_obstacle_ids=None,
+    ):
         if robot_state is None:
             robot_state = [[random.uniform(1, 9)], [random.uniform(1, 9)], [0], [0]]
 
@@ -44,10 +50,12 @@ def reset(self, robot_state=None, robot_goal=None, random_obstacles=True):
         )
 
         if random_obstacles:
+            if random_obstacle_ids is None:
+                random_obstacle_ids = [i + 1 for i in range(7)]
             self.env.random_obstacle_position(
                 range_low=[0, 0, -3.14],
                 range_high=[10, 10, 3.14],
-                ids=[i + 1 for i in range(7)],
+                ids=random_obstacle_ids,
                 non_overlapping=True,
             )
 
diff --git a/robot_nav/test_random.py b/robot_nav/test_random.py
@@ -0,0 +1,167 @@
+from robot_nav.models.TD3.TD3 import TD3
+from robot_nav.models.DDPG.DDPG import DDPG
+from robot_nav.models.SAC.SAC import SAC
+from robot_nav.models.HCM.hardcoded_model import HCM
+from robot_nav.models.PPO.PPO import PPO
+from robot_nav.models.BPG.BPG import BPG
+from robot_nav.models.BPG.BTD3 import BTD3
+from robot_nav.models.CNNTD3.CNNTD3 import CNNTD3
+import statistics
+import numpy as np
+import tqdm
+import matplotlib.pyplot as plt
+
+import torch
+from sim import SIM_ENV
+
+
+def main(args=None):
+    """Main testing function"""
+    action_dim = 2  # number of actions produced by the model
+    max_action = 1  # maximum absolute value of output actions
+    state_dim = 25  # number of input values in the neural network (vector length of state input)
+    device = torch.device(
+        "cuda" if torch.cuda.is_available() else "cpu"
+    )  # using cuda if it is available, cpu otherwise
+    epoch = 0  # epoch number
+    max_steps = 300  # maximum number of steps in single episode
+    test_scenarios = 1000
+
+    model = DDPG(
+        state_dim=state_dim,
+        action_dim=action_dim,
+        max_action=max_action,
+        device=device,
+        load_model=True,
+        model_name="DDPGexp5",
+    )  # instantiate a model
+
+    sim = SIM_ENV(
+        world_file="eval_world.yaml", disable_plotting=True
+    )  # instantiate environment
+
+    print("..............................................")
+    print(f"Testing {test_scenarios} scenarios")
+    total_reward = []
+    reward_per_ep = []
+    lin_actions = []
+    ang_actions = []
+    total_steps = 0
+    col = 0
+    goals = 0
+    inter_rew = []
+    steps_to_goal = []
+    for _ in tqdm.tqdm(range(test_scenarios)):
+        count = 0
+        ep_reward = 0
+        latest_scan, distance, cos, sin, collision, goal, a, reward = sim.reset(
+            robot_state=None,
+            robot_goal=None,
+            random_obstacles=True,
+            random_obstacle_ids=[i + 1 for i in range(6)],
+        )
+        done = False
+        while not done and count < max_steps:
+            state, terminal = model.prepare_state(
+                latest_scan, distance, cos, sin, collision, goal, a
+            )
+            action = model.get_action(np.array(state), False)
+            a_in = [(action[0] + 1) / 4, action[1]]
+            lin_actions.append(a_in[0])
+            ang_actions.append(a_in[1])
+            latest_scan, distance, cos, sin, collision, goal, a, reward = sim.step(
+                lin_velocity=a_in[0], ang_velocity=a_in[1]
+            )
+            ep_reward += reward
+            total_reward.append(reward)
+            total_steps += 1
+            count += 1
+            if collision:
+                col += 1
+            if goal:
+                goals += 1
+                steps_to_goal.append(count)
+            done = collision or goal
+            if done:
+                reward_per_ep.append(ep_reward)
+            if not done:
+                inter_rew.append(reward)
+
+    total_reward = np.array(total_reward)
+    reward_per_ep = np.array(reward_per_ep)
+    inter_rew = np.array(inter_rew)
+    steps_to_goal = np.array(steps_to_goal)
+    lin_actions = np.array(lin_actions)
+    ang_actions = np.array(ang_actions)
+    avg_step_reward = statistics.mean(total_reward)
+    avg_step_reward_std = statistics.stdev(total_reward)
+    avg_ep_reward = statistics.mean(reward_per_ep)
+    avg_ep_reward_std = statistics.stdev(reward_per_ep)
+    avg_col = col / test_scenarios
+    avg_goal = goals / test_scenarios
+    avg_inter_step_rew = statistics.mean(inter_rew)
+    avg_inter_step_rew_std = statistics.stdev(inter_rew)
+    avg_steps_to_goal = statistics.mean(steps_to_goal)
+    avg_steps_to_goal_std = statistics.stdev(steps_to_goal)
+    mean_lin_action = statistics.mean(lin_actions)
+    lin_actions_std = statistics.stdev(lin_actions)
+    mean_ang_action = statistics.mean(ang_actions)
+    ang_actions_std = statistics.stdev(ang_actions)
+    print(f"avg_step_reward {avg_step_reward}")
+    print(f"avg_step_reward_std: {avg_step_reward_std}")
+    print(f"avg_ep_reward: {avg_ep_reward}")
+    print(f"avg_ep_reward_std: {avg_ep_reward_std}")
+    print(f"avg_col: {avg_col}")
+    print(f"avg_goal: {avg_goal}")
+    print(f"avg_inter_step_rew: {avg_inter_step_rew}")
+    print(f"avg_inter_step_rew_std: {avg_inter_step_rew_std}")
+    print(f"avg_steps_to_goal: {avg_steps_to_goal}")
+    print(f"avg_steps_to_goal_std: {avg_steps_to_goal_std}")
+    print(f"mean_lin_action: {mean_lin_action}")
+    print(f"lin_actions_std: {lin_actions_std}")
+    print(f"mean_ang_action: {mean_ang_action}")
+    print(f"ang_actions_std: {ang_actions_std}")
+    print("..............................................")
+    model.writer.add_scalar("test/avg_step_reward", avg_step_reward, epoch)
+    model.writer.add_scalar("test/avg_step_reward_std", avg_step_reward_std, epoch)
+    model.writer.add_scalar("test/avg_ep_reward", avg_ep_reward, epoch)
+    model.writer.add_scalar("test/avg_ep_reward_std", avg_ep_reward_std, epoch)
+    model.writer.add_scalar("test/avg_col", avg_col, epoch)
+    model.writer.add_scalar("test/avg_goal", avg_goal, epoch)
+    model.writer.add_scalar("test/avg_inter_step_rew", avg_inter_step_rew, epoch)
+    model.writer.add_scalar(
+        "test/avg_inter_step_rew_std", avg_inter_step_rew_std, epoch
+    )
+    model.writer.add_scalar("test/avg_steps_to_goal", avg_steps_to_goal, epoch)
+    model.writer.add_scalar("test/avg_steps_to_goal_std", avg_steps_to_goal_std, epoch)
+    model.writer.add_scalar("test/mean_lin_action", mean_lin_action, epoch)
+    model.writer.add_scalar("test/lin_actions_std", lin_actions_std, epoch)
+    model.writer.add_scalar("test/mean_ang_action", mean_ang_action, epoch)
+    model.writer.add_scalar("test/ang_actions_std", ang_actions_std, epoch)
+    bins = 100
+    model.writer.add_histogram("test/lin_actions", lin_actions, epoch, max_bins=bins)
+    model.writer.add_histogram("test/ang_actions", ang_actions, epoch, max_bins=bins)
+
+    counts, bin_edges = np.histogram(lin_actions, bins=bins)
+    fig, ax = plt.subplots()
+    ax.bar(
+        bin_edges[:-1], counts, width=np.diff(bin_edges), align="edge", log=True
+    )  # Log scale on y-axis
+    ax.set_xlabel("Value")
+    ax.set_ylabel("Frequency (Log Scale)")
+    ax.set_title("Histogram with Log Scale")
+    model.writer.add_figure("test/lin_actions_hist", fig)
+
+    counts, bin_edges = np.histogram(ang_actions, bins=bins)
+    fig, ax = plt.subplots()
+    ax.bar(
+        bin_edges[:-1], counts, width=np.diff(bin_edges), align="edge", log=True
+    )  # Log scale on y-axis
+    ax.set_xlabel("Value")
+    ax.set_ylabel("Frequency (Log Scale)")
+    ax.set_title("Histogram with Log Scale")
+    model.writer.add_figure("test/ang_actions_hist", fig)
+
+
+if __name__ == "__main__":
+    main()
diff --git a/robot_nav/train.py b/robot_nav/train.py
@@ -1,10 +1,11 @@
-from models.TD3.TD3 import TD3
-from models.DDPG.DDPG import DDPG
+from robot_nav.models.TD3.TD3 import TD3
+from robot_nav.models.DDPG.DDPG import DDPG
 from robot_nav.models.BPG.BTD3 import BTD3
 from robot_nav.models.BPG.BPG import BPG
-from models.SAC.SAC import SAC
-from models.HCM.hardcoded_model import HCM
-from models.PPO.PPO import PPO
+from robot_nav.models.BPG.BCNNPG import BCNNPG
+from robot_nav.models.SAC.SAC import SAC
+from robot_nav.models.HCM.hardcoded_model import HCM
+from robot_nav.models.PPO.PPO import PPO
 from robot_nav.models.CNNTD3.CNNTD3 import CNNTD3
 
 import torch
@@ -38,18 +39,18 @@ def main(args=None):
     )
     save_every = 5  # save the model every n training cycles
 
-    model = BPG(
+    model = BCNNPG(
         state_dim=state_dim,
         action_dim=action_dim,
         max_action=max_action,
         device=device,
         save_every=save_every,
         load_model=False,
-        model_name="BPGw4exp1",
-        bound_weight=4,
+        model_name="BCNNPGw025exp1",
+        bound_weight=0.25,
     )  # instantiate a model
 
-    sim = SIM_ENV()  # instantiate environment
+    sim = SIM_ENV(disable_plotting=True)  # instantiate environment
     replay_buffer = get_buffer(
         model,
         sim,