update added noise

reiniscimurs · reiniscimurs · commit 9c2f5eb13aae · 2025-06-24T21:55:48.000+02:00
diff --git a/robot_nav/models/CNNTD3/att.py b/robot_nav/models/CNNTD3/att.py
@@ -346,7 +346,9 @@ def get_action(self, obs, add_noise):
         """
         action, connection, combined_weights = self.act(obs)
         if add_noise:
-            action = (action + np.random.normal(0, 0.1, size=action.shape)
+            noise = np.random.normal(0, 0.4, size=action.shape)
+            noise = [n/4 if i%2 else n for i, n in enumerate(noise)]
+            action = (action + noise
             ).clip(-self.max_action, self.max_action)
 
         return action.reshape(-1, 2), connection, combined_weights
diff --git a/robot_nav/multi_train2.py b/robot_nav/multi_train2.py
@@ -56,9 +56,9 @@ def main(args=None):
         num_robots=sim.num_robots,
         device=device,
     save_every=save_every,
-    load_model=False,
-    model_name="phase1",
-    load_model_name="phase1"
+    load_model=True,
+    model_name="phase3",
+    load_model_name="phase2"
     )  # instantiate a model
 
     replay_buffer = get_buffer(
diff --git a/robot_nav/sim2.py b/robot_nav/sim2.py
@@ -249,23 +249,23 @@ def get_reward(goal, collision, action, closest_robots, distance):
             # return 2*action[0] - abs(action[1]) - cl_pen + r_dist
 
         # phase1
-        if goal:
-            return 100.0
-        elif collision:
-            return -100.0
-        else:
-            r_dist = 1.5/distance
-            cl_pen = 0
-            for rob in closest_robots:
-                add = 1.5 - rob if rob < 1.5 else 0
-                cl_pen += add
-
-            return action[0] - 0.5 * abs(action[1])-cl_pen + r_dist
+        # if goal:
+        #     return 100.0
+        # elif collision:
+        #     return -100.0
+        # else:
+        #     r_dist = 1.5/distance
+        #     cl_pen = 0
+        #     for rob in closest_robots:
+        #         add = 1.5 - rob if rob < 1.5 else 0
+        #         cl_pen += add
+        #
+        #     return action[0] - 0.5 * abs(action[1])-cl_pen + r_dist
 
 
         # phase2
         # if goal:
-        #     return 80.0
+        #     return 100.0
         # elif collision:
         #     return -100.0
         # else:
@@ -277,3 +277,17 @@ def get_reward(goal, collision, action, closest_robots, distance):
         #
         #     return -0.5*abs(action[1])-cl_pen
 
+        # phase3
+        if goal:
+            return 100.0
+        elif collision:
+            return -100.0 * 3 * action[0]
+        else:
+            r_dist = 1.5 / distance
+            cl_pen = 0
+            for rob in closest_robots:
+                add = 1.5 - rob if rob < 1.5 else 0
+                cl_pen += add
+
+            return -0.5 * abs(action[1]) - cl_pen
+