Skip to content

Commit 7af52b3

Browse files
committed
working phase 2
1 parent 9c2f5eb commit 7af52b3

File tree

3 files changed

+18
-15
lines changed

3 files changed

+18
-15
lines changed

robot_nav/models/CNNTD3/att.py

Lines changed: 12 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -32,8 +32,8 @@ def __init__(self, embedding_dim):
3232

3333
# Soft attention projections
3434
self.q = nn.Linear(embedding_dim, embedding_dim, bias=False)
35-
self.k = nn.Linear(7, embedding_dim, bias=False)
36-
self.v = nn.Linear(7, embedding_dim)
35+
self.k = nn.Linear(9, embedding_dim, bias=False)
36+
self.v = nn.Linear(9, embedding_dim)
3737

3838
# Soft attention score network (with distance)
3939
self.attn_score_layer = nn.Sequential(
@@ -42,7 +42,6 @@ def __init__(self, embedding_dim):
4242
nn.Linear(embedding_dim, 1)
4343
)
4444

45-
self.v_proj = nn.Linear(7, embedding_dim)
4645
# Decoder
4746
self.decode_1 = nn.Linear(embedding_dim * 2, embedding_dim * 2)
4847
nn.init.kaiming_uniform_(self.decode_1.weight, nonlinearity="leaky_relu")
@@ -59,10 +58,14 @@ def forward(self, embedding):
5958
embedding = embedding.unsqueeze(0)
6059
batch_size, n_agents, _ = embedding.shape
6160

62-
embed = embedding[:, :, 4:].reshape(batch_size * n_agents, -1)
61+
embed = embedding[:, :, 4:9].reshape(batch_size * n_agents, -1)
6362
position = embedding[:, :, :2].reshape(batch_size, n_agents, 2)
6463
heading = embedding[:, :, 2:4].reshape(batch_size, n_agents, 2) # assume (cos(θ), sin(θ))
65-
action = embedding[:, :, -2:].reshape(batch_size, n_agents, 2)
64+
action = embedding[:, :, 7:9].reshape(batch_size, n_agents, 2)
65+
goal = embedding[:, :, -2:].reshape(batch_size, n_agents, 2)
66+
goal_j = goal.unsqueeze(1).expand(-1, n_agents, -1, -1) # (B, N, N, 2)
67+
pos_i = position.unsqueeze(2) # (B, N, 1, 2)
68+
rel_goal = goal_j - pos_i
6669

6770
agent_embed = self.encode_agent_features(embed)
6871
agent_embed = agent_embed.view(batch_size, n_agents, self.embedding_dim)
@@ -126,11 +129,12 @@ def forward(self, embedding):
126129
attention_outputs = []
127130
entropy_list = []
128131
combined_w = []
132+
soft_edge_features = torch.cat([edge_features, rel_goal], dim=-1)
129133
for i in range(n_agents):
130134
q_i = q[:, i:i + 1, :] # (B, 1, D)
131135
mask = torch.ones(n_agents, dtype=torch.bool, device=edge_features.device)
132136
mask[i] = False
133-
edge_i_wo_self = edge_features[:, i, mask, :]
137+
edge_i_wo_self = soft_edge_features[:, i, mask, :]
134138
edge_i_wo_self = edge_i_wo_self.squeeze(1) # (B, N-1, 7)
135139
k = F.leaky_relu(self.k(edge_i_wo_self))
136140

@@ -167,7 +171,6 @@ def forward(self, embedding):
167171
entropy_list.append(entropy)
168172

169173
# Project each other agent's features to embedding dim *before* the attention-weighted sum
170-
# v_j = self.v_proj(edge_i_wo_self) # (B, N-1, embedding_dim)
171174
v_j = F.leaky_relu(self.v(edge_i_wo_self))
172175
attn_output = torch.bmm(combined_weights, v_j).squeeze(1) # (B, embedding_dim)
173176
attention_outputs.append(attn_output)
@@ -346,7 +349,7 @@ def get_action(self, obs, add_noise):
346349
"""
347350
action, connection, combined_weights = self.act(obs)
348351
if add_noise:
349-
noise = np.random.normal(0, 0.4, size=action.shape)
352+
noise = np.random.normal(0, 0.5, size=action.shape)
350353
noise = [n/4 if i%2 else n for i, n in enumerate(noise)]
351354
action = (action + noise
352355
).clip(-self.max_action, self.max_action)
@@ -609,7 +612,7 @@ def prepare_state(self, poses, distance, cos, sin, collision, goal, action, posi
609612
ang_vel = (act[1] + 1) / 2 # Assuming original range [-1, 1]
610613

611614
# Final state vector
612-
state = [x, y, heading_cos, heading_sin, distance[i]/17, cos[i], sin[i], lin_vel, ang_vel]
615+
state = [x, y, heading_cos, heading_sin, distance[i]/17, cos[i], sin[i], lin_vel, ang_vel, gx, gy]
613616

614617
assert len(state) == self.state_dim, f"State length mismatch: expected {self.state_dim}, got {len(state)}"
615618
states.append(state)

robot_nav/multi_train2.py

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -24,7 +24,7 @@ def main(args=None):
2424
"""Main training function"""
2525
action_dim = 2 # number of actions produced by the model
2626
max_action = 1 # maximum absolute value of output actions
27-
state_dim = 9 # number of input values in the neural network (vector length of state input)
27+
state_dim = 11 # number of input values in the neural network (vector length of state input)
2828
device = torch.device(
2929
"cuda" if torch.cuda.is_available() else "cpu"
3030
) # using cuda if it is available, cpu otherwise
@@ -57,8 +57,8 @@ def main(args=None):
5757
device=device,
5858
save_every=save_every,
5959
load_model=True,
60-
model_name="phase3",
61-
load_model_name="phase2"
60+
model_name="phase2",
61+
load_model_name="phase1"
6262
) # instantiate a model
6363

6464
replay_buffer = get_buffer(

robot_nav/sim2.py

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -252,7 +252,7 @@ def get_reward(goal, collision, action, closest_robots, distance):
252252
# if goal:
253253
# return 100.0
254254
# elif collision:
255-
# return -100.0
255+
# return -100.0 * 3 * action[0]
256256
# else:
257257
# r_dist = 1.5/distance
258258
# cl_pen = 0
@@ -279,14 +279,14 @@ def get_reward(goal, collision, action, closest_robots, distance):
279279

280280
# phase3
281281
if goal:
282-
return 100.0
282+
return 70.0
283283
elif collision:
284284
return -100.0 * 3 * action[0]
285285
else:
286286
r_dist = 1.5 / distance
287287
cl_pen = 0
288288
for rob in closest_robots:
289-
add = 1.5 - rob if rob < 1.5 else 0
289+
add = 2.5 - rob if rob < 2.5 else 0
290290
cl_pen += add
291291

292292
return -0.5 * abs(action[1]) - cl_pen

0 commit comments

Comments
 (0)