Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
6 changes: 6 additions & 0 deletions docs/api/IR-SIM/ir-marl-sim.md
Original file line number Diff line number Diff line change
@@ -0,0 +1,6 @@
# MARL-IR-SIM

::: robot_nav.SIM_ENV.marl_sim
options:
show_root_heading: true
show_source: true
2 changes: 1 addition & 1 deletion docs/api/IR-SIM/ir-sim.md
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
# IR-SIM

::: robot_nav.sim
::: robot_nav.SIM_ENV.sim
options:
show_root_heading: true
show_source: true
6 changes: 6 additions & 0 deletions docs/api/models/MARL/Attention.md
Original file line number Diff line number Diff line change
@@ -0,0 +1,6 @@
# Hard-Soft Attention

::: robot_nav.models.MARL.hardsoftAttention
options:
show_root_heading: true
show_source: true
6 changes: 6 additions & 0 deletions docs/api/models/MARL/TD3.md
Original file line number Diff line number Diff line change
@@ -0,0 +1,6 @@
# MARL TD3

::: robot_nav.models.MARL.marlTD3
options:
show_root_heading: true
show_source: true
7 changes: 6 additions & 1 deletion mkdocs.yml
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,9 @@ extra:
nav:
- Home: index.md
- API Reference:
IR-SIM: api/IR-SIM/ir-sim
IR-SIM:
- SIM: api/IR-SIM/ir-sim
- MARL SIM: api/IR-SIM/ir-marl-sim
Models:
- DDPG: api/models/DDPG.md
- TD3: api/models/TD3.md
Expand All @@ -13,6 +15,9 @@ nav:
- HCM: api/models/HCM.md
- PPO: api/models/PPO.md
- SAC: api/models/SAC.md
- MARL:
- HardSoft Attention: api/models/MARL/Attention
- TD3: api/models/MARL/TD3
Training:
- Train: api/Training/train.md
- Train RNN: api/Training/trainrnn.md
Expand Down
78 changes: 51 additions & 27 deletions robot_nav/SIM_ENV/marl_sim.py
Original file line number Diff line number Diff line change
Expand Up @@ -8,23 +8,26 @@

class MARL_SIM(SIM_ENV):
"""
A simulation environment interface for robot navigation using IRSim in MARL setting.
Simulation environment for multi-agent robot navigation using IRSim.

This class wraps around the IRSim environment and provides methods for stepping,
resetting, and interacting with mobile robots, including reward computation.
This class extends the SIM_ENV and provides a wrapper for multi-robot
simulation and interaction, supporting reward computation and custom reset logic.

Attributes:
env (object): The simulation environment instance from IRSim.
robot_goal (np.ndarray): The goal position of the robot.
env (object): IRSim simulation environment instance.
robot_goal (np.ndarray): Current goal position(s) for the robots.
num_robots (int): Number of robots in the environment.
x_range (tuple): World x-range.
y_range (tuple): World y-range.
"""

def __init__(self, world_file="multi_robot_world.yaml", disable_plotting=False):
"""
Initialize the simulation environment.
Initialize the MARL_SIM environment.

Args:
world_file (str): Path to the world configuration YAML file.
disable_plotting (bool): If True, disables rendering and plotting.
world_file (str, optional): Path to the world configuration YAML file.
disable_plotting (bool, optional): If True, disables IRSim rendering and plotting.
"""
display = False if disable_plotting else True
self.env = irsim.make(
Expand All @@ -38,15 +41,26 @@ def __init__(self, world_file="multi_robot_world.yaml", disable_plotting=False):

def step(self, action, connection, combined_weights=None):
"""
Perform one step in the simulation using the given control commands.
Perform a simulation step for all robots using the provided actions and connections.

Args:
lin_velocity (float): Linear velocity to apply to the robot.
ang_velocity (float): Angular velocity to apply to the robot.
action (list): List of actions for each robot [[lin_vel, ang_vel], ...].
connection (Tensor): Tensor of shape (num_robots, num_robots-1) containing logits indicating connections between robots.
combined_weights (Tensor or None, optional): Optional weights for each connection, shape (num_robots, num_robots-1).

Returns:
(tuple): Contains the latest LIDAR scan, distance to goal, cosine and sine of angle to goal,
collision flag, goal reached flag, applied action, and computed reward.
tuple: (
poses (list): List of [x, y, theta] for each robot,
distances (list): Distance to goal for each robot,
coss (list): Cosine of angle to goal for each robot,
sins (list): Sine of angle to goal for each robot,
collisions (list): Collision status for each robot,
goals (list): Goal reached status for each robot,
action (list): Actions applied,
rewards (list): Rewards computed,
positions (list): Current [x, y] for each robot,
goal_positions (list): Goal [x, y] for each robot,
)
"""
self.env.step(action_id=[i for i in range(self.num_robots)], action=action)
self.env.render()
Expand Down Expand Up @@ -166,17 +180,27 @@ def reset(
random_obstacle_ids=None,
):
"""
Reset the simulation environment, optionally setting robot and obstacle states.
Reset the simulation environment and optionally set robot and obstacle positions.

Args:
robot_state (list or None): Initial state of the robot as a list of [x, y, theta, speed].
robot_goal (list or None): Goal state for the robot.
random_obstacles (bool): Whether to randomly reposition obstacles.
random_obstacle_ids (list or None): Specific obstacle IDs to randomize.
robot_state (list or None, optional): Initial state for robots as [x, y, theta, speed].
robot_goal (list or None, optional): Goal position(s) for the robots.
random_obstacles (bool, optional): If True, randomly position obstacles.
random_obstacle_ids (list or None, optional): IDs of obstacles to randomize.

Returns:
(tuple): Initial observation after reset, including LIDAR scan, distance, cos/sin,
and reward-related flags and values.
tuple: (
poses (list): List of [x, y, theta] for each robot,
distances (list): Distance to goal for each robot,
coss (list): Cosine of angle to goal for each robot,
sins (list): Sine of angle to goal for each robot,
collisions (list): All False after reset,
goals (list): All False after reset,
action (list): Initial action ([[0.0, 0.0], ...]),
rewards (list): Rewards for initial state,
positions (list): Initial [x, y] for each robot,
goal_positions (list): Initial goal [x, y] for each robot,
)
"""
if robot_state is None:
robot_state = [[random.uniform(3, 9)], [random.uniform(3, 9)], [0]]
Expand Down Expand Up @@ -254,18 +278,18 @@ def reset(
@staticmethod
def get_reward(goal, collision, action, closest_robots, distance, phase=1):
"""
Calculate the reward for the current step.
Calculate the reward for a robot given the current state and action.

Args:
goal (bool): Whether the goal has been reached.
goal (bool): Whether the robot reached its goal.
collision (bool): Whether a collision occurred.
action (list): The action taken [linear velocity, angular velocity].
closest_robots (list): Distances to the closest robots.
distance (float): Distance to goal.
phase (int, optional): Reward function phase. Defaults to 1.
action (list): [linear_velocity, angular_velocity] applied.
closest_robots (list): Distances to the closest other robots.
distance (float): Distance to the goal.
phase (int, optional): Reward phase/function selector (default: 1).

Returns:
(float): Computed reward for the current state.
float: Computed reward.
"""

match phase:
Expand Down
Empty file.
73 changes: 65 additions & 8 deletions robot_nav/models/MARL/hardsoftAttention.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,33 @@


class Attention(nn.Module):
"""
Multi-robot attention mechanism for learning hard and soft attentions.

This module provides both hard (binary) and soft (weighted) attention,
combining feature encoding, relative pose and goal geometry, and
message passing between agents.

Args:
embedding_dim (int): Dimension of the agent embedding vector.

Attributes:
embedding1 (nn.Linear): First layer for agent feature encoding.
embedding2 (nn.Linear): Second layer for agent feature encoding.
hard_mlp (nn.Sequential): MLP to process concatenated agent and edge features.
hard_encoding (nn.Linear): Outputs logits for hard (binary) attention.
q, k, v (nn.Linear): Layers for query, key, value projections for soft attention.
attn_score_layer (nn.Sequential): Computes unnormalized attention scores for each pair.
decode_1, decode_2 (nn.Linear): Decoding layers to produce the final attended embedding.
"""

def __init__(self, embedding_dim):
"""
Initialize attention mechanism for multi-agent communication.

Args:
embedding_dim (int): Output embedding dimension per agent.
"""
super(Attention, self).__init__()
self.embedding_dim = embedding_dim

Expand Down Expand Up @@ -41,30 +67,59 @@ def __init__(self, embedding_dim):
nn.init.kaiming_uniform_(self.decode_2.weight, nonlinearity="leaky_relu")

def encode_agent_features(self, embed):
"""
Encode agent features using a small MLP.

Args:
embed (Tensor): Input features (B*N, 5).

Returns:
Tensor: Encoded embedding (B*N, embedding_dim).
"""
embed = F.leaky_relu(self.embedding1(embed))
embed = F.leaky_relu(self.embedding2(embed))
return embed

def forward(self, embedding):
"""
Forward pass: computes both hard and soft attentions among agents,
produces the attended embedding for each agent, as well as diagnostic info.

Args:
embedding (Tensor): Input tensor of shape (B, N, D), where D is at least 11.

Returns:
tuple:
att_embedding (Tensor): Final attended embedding, shape (B*N, 2*embedding_dim).
hard_logits (Tensor): Logits for hard attention, (B*N, N-1).
unnorm_rel_dist (Tensor): Pairwise distances between agents (not normalized), (B*N, N-1, 1).
mean_entropy (Tensor): Mean entropy of soft attention distributions.
hard_weights (Tensor): Binary hard attention mask, (B, N, N-1).
comb_w (Tensor): Final combined attention weights, (N, N*(N-1)).
"""
if embedding.dim() == 2:
embedding = embedding.unsqueeze(0)
batch_size, n_agents, _ = embedding.shape

# Extract sub-features
embed = embedding[:, :, 4:9].reshape(batch_size * n_agents, -1)
position = embedding[:, :, :2].reshape(batch_size, n_agents, 2)
heading = embedding[:, :, 2:4].reshape(
batch_size, n_agents, 2
) # assume (cos(θ), sin(θ))
action = embedding[:, :, 7:9].reshape(batch_size, n_agents, 2)
goal = embedding[:, :, -2:].reshape(batch_size, n_agents, 2)

# Compute pairwise relative goal vectors (for each i,j)
goal_j = goal.unsqueeze(1).expand(-1, n_agents, -1, -1)
pos_i = position.unsqueeze(2)
goal_rel_vec = goal_j - pos_i

# Encode agent features
agent_embed = self.encode_agent_features(embed)
agent_embed = agent_embed.view(batch_size, n_agents, self.embedding_dim)

# For hard attention
# Prep for hard attention: compute all relative geometry for each agent pair
h_i = agent_embed.unsqueeze(2) # (B, N, 1, D)
pos_i = position.unsqueeze(2) # (B, N, 1, 2)
pos_j = position.unsqueeze(1) # (B, 1, N, 2)
Expand All @@ -88,7 +143,7 @@ def forward(self, embedding):
heading_j_cos = heading_j[..., 0] # (B, 1, N)
heading_j_sin = heading_j[..., 1] # (B, 1, N)

# Stack edge features
# Edge features for hard attention
edge_features = torch.cat(
[
rel_dist, # (B, N, N, 1)
Expand All @@ -101,7 +156,7 @@ def forward(self, embedding):
dim=-1,
)

# Broadcast h_i along N (for each pair)
# Broadcast agent embedding for all pairs (except self-pairs)
h_i_expanded = h_i.expand(-1, -1, n_agents, -1)

# Remove self-pairs using mask
Expand Down Expand Up @@ -129,13 +184,14 @@ def forward(self, embedding):
batch_size * n_agents, n_agents - 1, 1
)

# Soft attention
# ---- Soft attention computation ----
q = self.q(agent_embed)

attention_outputs = []
entropy_list = []
combined_w = []

# Goal-relative polar features for soft attention
goal_rel_dist = torch.linalg.vector_norm(goal_rel_vec, dim=-1, keepdim=True)
goal_angle_global = torch.atan2(goal_rel_vec[..., 1], goal_rel_vec[..., 0])
heading_angle = torch.atan2(heading_i[..., 1], heading_i[..., 0])
Expand All @@ -147,6 +203,7 @@ def forward(self, embedding):
[goal_rel_dist, goal_rel_angle_cos, goal_rel_angle_sin], dim=-1
)

# Soft attention edge features (include goal polar)
soft_edge_features = torch.cat([edge_features, goal_polar], dim=-1)
for i in range(n_agents):
q_i = q[:, i : i + 1, :]
Expand All @@ -159,10 +216,10 @@ def forward(self, embedding):
q_i_expanded = q_i.expand(-1, n_agents - 1, -1)
attention_input = torch.cat([q_i_expanded, k], dim=-1)

# Score computation
# Score computation (per pair)
scores = self.attn_score_layer(attention_input).transpose(1, 2)

# Mask using hard weights
# Mask using hard attention
h_weights = hard_weights[:, i].unsqueeze(1)
mask = (h_weights > 0.5).float()

Expand All @@ -183,12 +240,12 @@ def forward(self, embedding):
combined_weights = soft_weights * mask # (B, 1, N-1)
combined_w.append(combined_weights)

# Normalize combined_weights for entropy calculation
# Normalize for entropy calculation
combined_weights_norm = combined_weights / (
combined_weights.sum(dim=-1, keepdim=True) + epsilon
)

# Calculate entropy from combined_weights
# Entropy for analysis/logging
entropy = (
-(combined_weights_norm * (combined_weights_norm + epsilon).log())
.sum(dim=-1)
Expand Down
Loading