pass max bound by init

reiniscimurs · reiniscimurs · commit 07bcb41a2fb1 · 2025-03-02T13:24:43.000+01:00
diff --git a/robot_nav/models/BPG/BCNNPG.py b/robot_nav/models/BPG/BCNNPG.py
@@ -112,8 +112,10 @@ def __init__(
         save_directory=Path("robot_nav/models/BPG/checkpoint"),
         model_name="BCNNPG",
         load_directory=Path("robot_nav/models/BPG/checkpoint"),
+        bound_weight=8
     ):
         # Initialize the Actor network
+        self.bound_weight = bound_weight
         self.device = device
         self.actor = Actor(action_dim).to(self.device)
         self.actor_target = Actor(action_dim).to(self.device)
@@ -223,7 +225,7 @@ def train(
 
             # Calculate the loss between the current Q value and the target Q value
             loss_target_Q = F.mse_loss(current_Q, target_Q)
-            max_bound_loss = 10 * max_bound_loss_Q
+            max_bound_loss = self.bound_weight * max_bound_loss_Q
             loss = loss_target_Q + max_bound_loss
             # Perform the gradient descent
             self.critic_optimizer.zero_grad()
diff --git a/robot_nav/models/BPG/BCNNTD3.py b/robot_nav/models/BPG/BCNNTD3.py
@@ -128,8 +128,10 @@ def __init__(
         save_directory=Path("robot_nav/models/BPG/checkpoint"),
         model_name="BCNNTD3",
         load_directory=Path("robot_nav/models/BPG/checkpoint"),
+        bound_weight=8
     ):
         # Initialize the Actor network
+        self.bound_weight = bound_weight
         self.device = device
         self.actor = Actor(action_dim).to(self.device)
         self.actor_target = Actor(action_dim).to(self.device)
@@ -244,7 +246,7 @@ def train(
             loss_target_Q = F.mse_loss(current_Q1, target_Q) + F.mse_loss(
                 current_Q2, target_Q
             )
-            max_bound_loss = 10 * (max_bound_loss_Q1 + max_bound_loss_Q2)
+            max_bound_loss = self.bound_weight * (max_bound_loss_Q1 + max_bound_loss_Q2)
             loss = loss_target_Q + max_bound_loss
             # Perform the gradient descent
             self.critic_optimizer.zero_grad()
diff --git a/robot_nav/models/BPG/BPG.py b/robot_nav/models/BPG/BPG.py
@@ -65,8 +65,10 @@ def __init__(
         save_directory=Path("robot_nav/models/BPG/checkpoint"),
         model_name="BPG",
         load_directory=Path("robot_nav/models/BPG/checkpoint"),
+        bound_weight=8
     ):
         # Initialize the Actor network
+        self.bound_weight = bound_weight
         self.device = device
         self.actor = Actor(state_dim, action_dim).to(self.device)
         self.actor_target = Actor(state_dim, action_dim).to(self.device)
@@ -175,7 +177,7 @@ def train(
             # Calculate the loss between the current Q value and the target Q value
             loss_target_Q = F.mse_loss(current_Q, target_Q)
 
-            max_bound_loss = 10 * max_bound_loss
+            max_bound_loss = self.bound_weight * max_bound_loss
             loss = loss_target_Q + max_bound_loss
             # Perform the gradient descent
             self.critic_optimizer.zero_grad()
diff --git a/robot_nav/models/BPG/BTD3.py b/robot_nav/models/BPG/BTD3.py
@@ -82,8 +82,10 @@ def __init__(
         save_directory=Path("robot_nav/models/BPG/checkpoint"),
         model_name="BTD3",
         load_directory=Path("robot_nav/models/BPG/checkpoint"),
+        bound_weight = 8
     ):
         # Initialize the Actor network
+        self.bound_weight = bound_weight
         self.device = device
         self.actor = Actor(state_dim, action_dim).to(self.device)
         self.actor_target = Actor(state_dim, action_dim).to(self.device)
@@ -197,7 +199,7 @@ def train(
             loss_target_Q = F.mse_loss(current_Q1, target_Q) + F.mse_loss(
                 current_Q2, target_Q
             )
-            max_bound_loss = 10 * (max_bound_loss_Q1 + max_bound_loss_Q2)
+            max_bound_loss = self.bound_weight * (max_bound_loss_Q1 + max_bound_loss_Q2)
             loss = loss_target_Q + max_bound_loss
             # Perform the gradient descent
             self.critic_optimizer.zero_grad()