working setup

reiniscimurs · reiniscimurs · commit bc5e0ae3bc00 · 2025-06-22T11:21:44.000+02:00
diff --git a/robot_nav/models/CNNTD3/att.py b/robot_nav/models/CNNTD3/att.py
@@ -443,7 +443,7 @@ def train(
             # else:
             #     bce_loss = torch.tensor(0.0, device=masked_weights.device)
 
-            bce_weight = 0.0
+            bce_weight = 0.1
             av_critic_bce_loss.append(bce_loss)
 
             critic_entropy_weight = 1  # or tuneable
@@ -473,7 +473,7 @@ def train(
                 # else:
                 #     bce_loss = torch.tensor(0.0, device=masked_weights.device)
 
-                bce_weight = 0.0
+                bce_weight = 0.1
                 av_actor_bce_loss.append(bce_loss)
 
                 actor_Q, _, _, _, _, _ = self.critic(state, action)
diff --git a/robot_nav/multi_train2.py b/robot_nav/multi_train2.py
@@ -56,8 +56,8 @@ def main(args=None):
         num_robots=sim.num_robots,
         device=device,
     save_every=save_every,
-    load_model=True,
-    model_name="phase2",
+    load_model=False,
+    model_name="phase1",
     load_model_name="phase1"
     )  # instantiate a model
 
diff --git a/robot_nav/sim2.py b/robot_nav/sim2.py
@@ -249,23 +249,8 @@ def get_reward(goal, collision, action, closest_robots, distance):
             # return 2*action[0] - abs(action[1]) - cl_pen + r_dist
 
         # phase1
-        # if goal:
-        #     return 100.0
-        # elif collision:
-        #     return -100.0
-        # else:
-        #     r_dist = 1.5/distance
-        #     cl_pen = 0
-        #     for rob in closest_robots:
-        #         add = 1.5 - rob if rob < 1.5 else 0
-        #         cl_pen += add
-        #
-        #     return action[0] - 0.5 * abs(action[1])-cl_pen + r_dist
-
-
-        # phase2
         if goal:
-            return 80.0
+            return 100.0
         elif collision:
             return -100.0
         else:
@@ -275,5 +260,20 @@ def get_reward(goal, collision, action, closest_robots, distance):
                 add = 1.5 - rob if rob < 1.5 else 0
                 cl_pen += add
 
-            return -0.5*abs(action[1])-cl_pen
+            return action[0] - 0.5 * abs(action[1])-cl_pen + r_dist
+
+
+        # phase2
+        # if goal:
+        #     return 80.0
+        # elif collision:
+        #     return -100.0
+        # else:
+        #     r_dist = 1.5/distance
+        #     cl_pen = 0
+        #     for rob in closest_robots:
+        #         add = 1.5 - rob if rob < 1.5 else 0
+        #         cl_pen += add
+        #
+        #     return -0.5*abs(action[1])-cl_pen