working phase 2

reiniscimurs · reiniscimurs · commit 7af52b37768f · 2025-06-27T22:56:03.000+02:00
diff --git a/robot_nav/models/CNNTD3/att.py b/robot_nav/models/CNNTD3/att.py
@@ -32,8 +32,8 @@ def __init__(self, embedding_dim):
 
         # Soft attention projections
         self.q = nn.Linear(embedding_dim, embedding_dim, bias=False)
-        self.k = nn.Linear(7, embedding_dim, bias=False)
-        self.v = nn.Linear(7, embedding_dim)
+        self.k = nn.Linear(9, embedding_dim, bias=False)
+        self.v = nn.Linear(9, embedding_dim)
 
         # Soft attention score network (with distance)
         self.attn_score_layer = nn.Sequential(
@@ -42,7 +42,6 @@ def __init__(self, embedding_dim):
             nn.Linear(embedding_dim, 1)
         )
 
-        self.v_proj = nn.Linear(7, embedding_dim)
         # Decoder
         self.decode_1 = nn.Linear(embedding_dim * 2, embedding_dim * 2)
         nn.init.kaiming_uniform_(self.decode_1.weight, nonlinearity="leaky_relu")
@@ -59,10 +58,14 @@ def forward(self, embedding):
             embedding = embedding.unsqueeze(0)
         batch_size, n_agents, _ = embedding.shape
 
-        embed = embedding[:, :, 4:].reshape(batch_size * n_agents, -1)
+        embed = embedding[:, :, 4:9].reshape(batch_size * n_agents, -1)
         position = embedding[:, :, :2].reshape(batch_size, n_agents, 2)
         heading = embedding[:, :, 2:4].reshape(batch_size, n_agents, 2)  # assume (cos(θ), sin(θ))
-        action = embedding[:, :, -2:].reshape(batch_size, n_agents, 2)
+        action = embedding[:, :, 7:9].reshape(batch_size, n_agents, 2)
+        goal = embedding[:, :, -2:].reshape(batch_size, n_agents, 2)
+        goal_j = goal.unsqueeze(1).expand(-1, n_agents, -1, -1)  # (B, N, N, 2)
+        pos_i = position.unsqueeze(2)  # (B, N, 1, 2)
+        rel_goal = goal_j - pos_i
 
         agent_embed = self.encode_agent_features(embed)
         agent_embed = agent_embed.view(batch_size, n_agents, self.embedding_dim)
@@ -126,11 +129,12 @@ def forward(self, embedding):
         attention_outputs = []
         entropy_list = []
         combined_w = []
+        soft_edge_features = torch.cat([edge_features, rel_goal], dim=-1)
         for i in range(n_agents):
             q_i = q[:, i:i + 1, :]  # (B, 1, D)
             mask = torch.ones(n_agents, dtype=torch.bool, device=edge_features.device)
             mask[i] = False
-            edge_i_wo_self = edge_features[:, i, mask, :]
+            edge_i_wo_self = soft_edge_features[:, i, mask, :]
             edge_i_wo_self = edge_i_wo_self.squeeze(1)  # (B, N-1, 7)
             k = F.leaky_relu(self.k(edge_i_wo_self))
 
@@ -167,7 +171,6 @@ def forward(self, embedding):
             entropy_list.append(entropy)
 
             # Project each other agent's features to embedding dim *before* the attention-weighted sum
-            # v_j = self.v_proj(edge_i_wo_self)  # (B, N-1, embedding_dim)
             v_j = F.leaky_relu(self.v(edge_i_wo_self))
             attn_output = torch.bmm(combined_weights, v_j).squeeze(1)  # (B, embedding_dim)
             attention_outputs.append(attn_output)
@@ -346,7 +349,7 @@ def get_action(self, obs, add_noise):
         """
         action, connection, combined_weights = self.act(obs)
         if add_noise:
-            noise = np.random.normal(0, 0.4, size=action.shape)
+            noise = np.random.normal(0, 0.5, size=action.shape)
             noise = [n/4 if i%2 else n for i, n in enumerate(noise)]
             action = (action + noise
             ).clip(-self.max_action, self.max_action)
@@ -609,7 +612,7 @@ def prepare_state(self, poses, distance, cos, sin, collision, goal, action, posi
             ang_vel = (act[1] + 1) / 2  # Assuming original range [-1, 1]
 
             # Final state vector
-            state = [x, y, heading_cos, heading_sin, distance[i]/17, cos[i], sin[i], lin_vel, ang_vel]
+            state = [x, y, heading_cos, heading_sin, distance[i]/17, cos[i], sin[i], lin_vel, ang_vel, gx, gy]
 
             assert len(state) == self.state_dim, f"State length mismatch: expected {self.state_dim}, got {len(state)}"
             states.append(state)
diff --git a/robot_nav/multi_train2.py b/robot_nav/multi_train2.py
@@ -24,7 +24,7 @@ def main(args=None):
     """Main training function"""
     action_dim = 2  # number of actions produced by the model
     max_action = 1  # maximum absolute value of output actions
-    state_dim = 9  # number of input values in the neural network (vector length of state input)
+    state_dim = 11  # number of input values in the neural network (vector length of state input)
     device = torch.device(
         "cuda" if torch.cuda.is_available() else "cpu"
     )  # using cuda if it is available, cpu otherwise
@@ -57,8 +57,8 @@ def main(args=None):
         device=device,
     save_every=save_every,
     load_model=True,
-    model_name="phase3",
-    load_model_name="phase2"
+    model_name="phase2",
+    load_model_name="phase1"
     )  # instantiate a model
 
     replay_buffer = get_buffer(
diff --git a/robot_nav/sim2.py b/robot_nav/sim2.py
@@ -252,7 +252,7 @@ def get_reward(goal, collision, action, closest_robots, distance):
         # if goal:
         #     return 100.0
         # elif collision:
-        #     return -100.0
+        #     return -100.0 * 3 * action[0]
         # else:
         #     r_dist = 1.5/distance
         #     cl_pen = 0
@@ -279,14 +279,14 @@ def get_reward(goal, collision, action, closest_robots, distance):
 
         # phase3
         if goal:
-            return 100.0
+            return 70.0
         elif collision:
             return -100.0 * 3 * action[0]
         else:
             r_dist = 1.5 / distance
             cl_pen = 0
             for rob in closest_robots:
-                add = 1.5 - rob if rob < 1.5 else 0
+                add = 2.5 - rob if rob < 2.5 else 0
                 cl_pen += add
 
             return -0.5 * abs(action[1]) - cl_pen