Skip to content

Commit 1210136

Browse files
authored
Merge pull request #15 from reiniscimurs/feature/hard_entropy
Create IGA MARL method
2 parents 6981562 + 0699d52 commit 1210136

25 files changed

+2696
-249
lines changed

poetry.lock

Lines changed: 665 additions & 1 deletion
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.

pyproject.toml

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -17,6 +17,7 @@ dependencies = [
1717
"mkdocstrings[python] (>=0.29.1,<0.30.0)",
1818
"mkdocs-material (>=9.6.11,<10.0.0)",
1919
"scipy (>=1.15.2,<2.0.0)",
20+
"torch-geometric (>=2.6.1,<3.0.0)",
2021
]
2122

2223
[tool.pytest.ini_options]

robot_nav/SIM_ENV/marl_sim.py

Lines changed: 84 additions & 45 deletions
Original file line numberDiff line numberDiff line change
@@ -21,13 +21,25 @@ class MARL_SIM(SIM_ENV):
2121
y_range (tuple): World y-range.
2222
"""
2323

24-
def __init__(self, world_file="multi_robot_world.yaml", disable_plotting=False):
24+
def __init__(
25+
self,
26+
world_file="multi_robot_world.yaml",
27+
disable_plotting=False,
28+
reward_phase=1,
29+
):
2530
"""
2631
Initialize the MARL_SIM environment.
2732
2833
Args:
29-
world_file (str, optional): Path to the world configuration YAML file.
30-
disable_plotting (bool, optional): If True, disables IRSim rendering and plotting.
34+
world_file (str, optional):
35+
Path to an IRSim YAML world configuration. Defaults to
36+
"multi_robot_world.yaml".
37+
disable_plotting (bool, optional):
38+
If True, disables all IRSim plotting and display windows.
39+
Useful for headless training. Defaults to False.
40+
reward_phase (int, optional):
41+
Selects the reward function variant used by `get_reward`.
42+
Supported values: {1, 2}. Defaults to 1.
3143
"""
3244
display = False if disable_plotting else True
3345
self.env = irsim.make(
@@ -38,29 +50,58 @@ def __init__(self, world_file="multi_robot_world.yaml", disable_plotting=False):
3850
self.num_robots = len(self.env.robot_list)
3951
self.x_range = self.env._world.x_range
4052
self.y_range = self.env._world.y_range
53+
self.reward_phase = reward_phase
4154

4255
def step(self, action, connection, combined_weights=None):
4356
"""
4457
Perform a simulation step for all robots using the provided actions and connections.
4558
4659
Args:
47-
action (list): List of actions for each robot [[lin_vel, ang_vel], ...].
48-
connection (Tensor): Tensor of shape (num_robots, num_robots-1) containing logits indicating connections between robots.
49-
combined_weights (Tensor or None, optional): Optional weights for each connection, shape (num_robots, num_robots-1).
60+
action (list):
61+
A list of length `num_robots`. Each element is
62+
`[linear_velocity, angular_velocity]` to be applied to the
63+
corresponding robot for this step. Units follow IRSim's
64+
kinematics setup (typically m/s and rad/s).
65+
connection (Tensor):
66+
A torch.Tensor of shape `(num_robots, num_robots-1)` containing
67+
*logits* that indicate pairwise connections per robot *without*
68+
the self-connection column. This is not used for logic here,
69+
but preserved for compatibility (see commented code); you may
70+
use it upstream to form `combined_weights`.
71+
combined_weights (Tensor or None, optional):
72+
A torch.Tensor of shape `(num_robots, num_robots)` **or**
73+
`(num_robots, num_robots-1)` that encodes visualization weights
74+
for robot-to-robot edges. When provided, edges from robot *i* to
75+
*j* are drawn with line width `weight * 2`. If you pass the
76+
`(num_robots, num_robots-1)` form, ensure indexing aligns with
77+
how you constructed it (self-column typically omitted).
5078
5179
Returns:
52-
tuple: (
53-
poses (list): List of [x, y, theta] for each robot,
54-
distances (list): Distance to goal for each robot,
55-
coss (list): Cosine of angle to goal for each robot,
56-
sins (list): Sine of angle to goal for each robot,
57-
collisions (list): Collision status for each robot,
58-
goals (list): Goal reached status for each robot,
59-
action (list): Actions applied,
60-
rewards (list): Rewards computed,
61-
positions (list): Current [x, y] for each robot,
62-
goal_positions (list): Goal [x, y] for each robot,
63-
)
80+
tuple:
81+
(
82+
poses (list[list[float]]):
83+
`[x, y, theta]` for each robot after the step.
84+
distances (list[float]):
85+
Euclidean distance from each robot to its goal.
86+
coss (list[float]):
87+
Cosine of the angle between robot heading and goal vector.
88+
sins (list[float]):
89+
Sine of the angle between robot heading and goal vector.
90+
collisions (list[bool]):
91+
Collision flags for each robot, as reported by IRSim.
92+
goals (list[bool]):
93+
Arrival flags for each robot. If True this step, a new
94+
random goal was scheduled for that robot.
95+
action (list):
96+
Echo of the input `action`.
97+
rewards (list[float]):
98+
Per-robot rewards computed by `get_reward(...)`.
99+
positions (list[list[float]]):
100+
`[x, y]` positions for each robot after the step.
101+
goal_positions (list[list[float]]):
102+
`[x, y]` goal coordinates for each robot after any
103+
arrival updates this step.
104+
)
64105
"""
65106
self.env.step(action_id=[i for i in range(self.num_robots)], action=action)
66107
self.env.render()
@@ -103,7 +144,7 @@ def step(self, action, connection, combined_weights=None):
103144
collision = self.env.robot_list[i].collision
104145
action_i = action[i]
105146
reward = self.get_reward(
106-
goal, collision, action_i, closest_robots, distance
147+
goal, collision, action_i, closest_robots, distance, self.reward_phase
107148
)
108149

109150
position = [robot_state[0].item(), robot_state[1].item()]
@@ -121,23 +162,14 @@ def step(self, action, connection, combined_weights=None):
121162
)
122163
goal_positions.append(goal_position)
123164

124-
i_probs = torch.sigmoid(
125-
connection[i]
126-
) # connection[i] is logits for "connect" per pair
127-
128-
# Now we need to insert the self-connection (optional, typically skipped)
129-
i_connections = i_probs.tolist()
130-
i_connections.insert(i, 0)
131165
if combined_weights is not None:
132166
i_weights = combined_weights[i].tolist()
133-
i_weights.insert(i, 0)
134167

135168
for j in range(self.num_robots):
136-
if i_connections[j] > 0.5:
137-
if combined_weights is not None:
138-
weight = i_weights[j]
139-
else:
140-
weight = 1
169+
if combined_weights is not None:
170+
weight = i_weights[j]
171+
# else:
172+
# weight = 1
141173
other_robot_state = self.env.robot_list[j].state
142174
other_pos = [
143175
other_robot_state[0].item(),
@@ -146,7 +178,7 @@ def step(self, action, connection, combined_weights=None):
146178
rx = [position[0], other_pos[0]]
147179
ry = [position[1], other_pos[1]]
148180
self.env.draw_trajectory(
149-
np.array([rx, ry]), refresh=True, linewidth=weight
181+
np.array([rx, ry]), refresh=True, linewidth=weight * 2
150182
)
151183

152184
if goal:
@@ -183,10 +215,13 @@ def reset(
183215
Reset the simulation environment and optionally set robot and obstacle positions.
184216
185217
Args:
186-
robot_state (list or None, optional): Initial state for robots as [x, y, theta, speed].
187-
robot_goal (list or None, optional): Goal position(s) for the robots.
188-
random_obstacles (bool, optional): If True, randomly position obstacles.
189-
random_obstacle_ids (list or None, optional): IDs of obstacles to randomize.
218+
robot_state (list or None, optional): Initial robot state(s) as [[x], [y], [theta]].
219+
If None, random states are assigned ensuring minimum spacing between robots.
220+
robot_goal (list or None, optional): Fixed goal position(s) for robots. If None, random
221+
goals are generated within the environment boundaries.
222+
random_obstacles (bool, optional): If True, reposition obstacles randomly within bounds.
223+
random_obstacle_ids (list or None, optional): IDs of obstacles to randomize. Defaults to
224+
seven obstacles starting from index equal to the number of robots.
190225
191226
Returns:
192227
tuple: (
@@ -278,18 +313,22 @@ def reset(
278313
@staticmethod
279314
def get_reward(goal, collision, action, closest_robots, distance, phase=1):
280315
"""
281-
Calculate the reward for a robot given the current state and action.
316+
Compute the reward for a single robot based on goal progress, collisions,
317+
control effort, and proximity to other robots.
282318
283319
Args:
284320
goal (bool): Whether the robot reached its goal.
285-
collision (bool): Whether a collision occurred.
286-
action (list): [linear_velocity, angular_velocity] applied.
287-
closest_robots (list): Distances to the closest other robots.
288-
distance (float): Distance to the goal.
289-
phase (int, optional): Reward phase/function selector (default: 1).
321+
collision (bool): Whether the robot collided with an obstacle or another robot.
322+
action (list): [linear_velocity, angular_velocity] applied by the robot.
323+
closest_robots (list): Distances to other robots.
324+
distance (float): Current distance to the goal.
325+
phase (int, optional): Reward configuration (1 or 2). Default is 1.
290326
291327
Returns:
292-
float: Computed reward.
328+
float: Computed scalar reward.
329+
330+
Raises:
331+
ValueError: If an unknown reward phase is specified.
293332
"""
294333

295334
match phase:
@@ -302,7 +341,7 @@ def get_reward(goal, collision, action, closest_robots, distance, phase=1):
302341
r_dist = 1.5 / distance
303342
cl_pen = 0
304343
for rob in closest_robots:
305-
add = 1.5 - rob if rob < 1.5 else 0
344+
add = (1.25 - rob) ** 2 if rob < 1.25 else 0
306345
cl_pen += add
307346

308347
return action[0] - 0.5 * abs(action[1]) - cl_pen + r_dist

0 commit comments

Comments
 (0)