fix the bounds loss

reiniscimurs · reiniscimurs · commit 451e50b4d5e9 · 2025-04-02T20:29:21.000+02:00
diff --git a/robot_nav/models/BPG/BCNNPG.py b/robot_nav/models/BPG/BCNNPG.py
@@ -230,8 +230,8 @@ def train(
             max_b = max(max_b, torch.max(max_bound))
             av_bound += torch.mean(max_bound)
 
-            max_bound_Q = torch.min(current_Q, max_bound)
-            max_bound_loss = F.mse_loss(current_Q, max_bound_Q)
+            max_excess_Q = F.relu(current_Q - max_bound)
+            max_bound_loss = (max_excess_Q ** 2).mean()
             # Calculate the loss between the current Q value and the target Q value
             loss_target_Q = F.mse_loss(current_Q, target_Q)
 
diff --git a/robot_nav/models/BPG/BCNNTD3.py b/robot_nav/models/BPG/BCNNTD3.py
@@ -247,16 +247,16 @@ def train(
             )
             max_b += max(max_b, torch.max(max_bound))
             av_bound += torch.mean(max_bound)
-            max_bound_Q1 = torch.min(current_Q1, max_bound)
-            max_bound_loss_Q1 = F.mse_loss(current_Q1, max_bound_Q1)
-            max_bound_Q2 = torch.min(current_Q2, max_bound)
-            max_bound_loss_Q2 = F.mse_loss(current_Q2, max_bound_Q2)
+            max_excess_Q1 = F.relu(current_Q1 - max_bound)
+            max_bound_loss_Q1 = (max_excess_Q1 ** 2).mean()
+            max_excess_Q2 = F.relu(current_Q2 - max_bound)
+            max_bound_loss_Q2 = (max_excess_Q2 ** 2).mean()
+            max_bound_loss = self.bound_weight * (max_bound_loss_Q1 + max_bound_loss_Q2)
 
             # Calculate the loss between the current Q value and the target Q value
             loss_target_Q = F.mse_loss(current_Q1, target_Q) + F.mse_loss(
                 current_Q2, target_Q
             )
-            max_bound_loss = self.bound_weight * (max_bound_loss_Q1 + max_bound_loss_Q2)
             loss = loss_target_Q + max_bound_loss
             # Perform the gradient descent
             self.critic_optimizer.zero_grad()
diff --git a/robot_nav/models/BPG/BPG.py b/robot_nav/models/BPG/BPG.py
@@ -183,8 +183,8 @@ def train(
             max_b = max(max_b, torch.max(max_bound))
             av_bound += torch.mean(max_bound)
 
-            max_bound_Q = torch.min(current_Q, max_bound)
-            max_bound_loss = F.mse_loss(current_Q, max_bound_Q)
+            max_excess_Q = F.relu(current_Q - max_bound)
+            max_bound_loss = (max_excess_Q ** 2).mean()
             # Calculate the loss between the current Q value and the target Q value
             loss_target_Q = F.mse_loss(current_Q, target_Q)
 
diff --git a/robot_nav/models/BPG/BTD3.py b/robot_nav/models/BPG/BTD3.py
@@ -201,16 +201,16 @@ def train(
             )
             max_b += max(max_b, torch.max(max_bound))
             av_bound += torch.mean(max_bound)
-            max_bound_Q1 = torch.min(current_Q1, max_bound)
-            max_bound_loss_Q1 = F.mse_loss(current_Q1, max_bound_Q1)
-            max_bound_Q2 = torch.min(current_Q2, max_bound)
-            max_bound_loss_Q2 = F.mse_loss(current_Q2, max_bound_Q2)
+            max_excess_Q1 = F.relu(current_Q1 - max_bound)
+            max_bound_loss_Q1 = (max_excess_Q1 ** 2).mean()
+            max_excess_Q2 = F.relu(current_Q2 - max_bound)
+            max_bound_loss_Q2 = (max_excess_Q2 ** 2).mean()
+            max_bound_loss = self.bound_weight * (max_bound_loss_Q1 + max_bound_loss_Q2)
 
             # Calculate the loss between the current Q value and the target Q value
             loss_target_Q = F.mse_loss(current_Q1, target_Q) + F.mse_loss(
                 current_Q2, target_Q
             )
-            max_bound_loss = self.bound_weight * (max_bound_loss_Q1 + max_bound_loss_Q2)
             loss = loss_target_Q + max_bound_loss
             # Perform the gradient descent
             self.critic_optimizer.zero_grad()