Skip to content

Commit fe977a2

Browse files
authored
Merge pull request #12 from reiniscimurs/feature/multi_robot_gnn
Multi Robot Gnn
2 parents bf89430 + b98444e commit fe977a2

24 files changed

+1549
-230
lines changed

poetry.lock

Lines changed: 163 additions & 157 deletions
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.

pyproject.toml

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -16,7 +16,7 @@ dependencies = [
1616
"mkdocs (>=1.6.1,<2.0.0)",
1717
"mkdocstrings[python] (>=0.29.1,<0.30.0)",
1818
"mkdocs-material (>=9.6.11,<10.0.0)",
19-
"scipy (>=1.15.2,<2.0.0)"
19+
"scipy (>=1.15.2,<2.0.0)",
2020
]
2121

2222
[tool.pytest.ini_options]

robot_nav/SIM_ENV/__init__.py

Whitespace-only changes.

robot_nav/SIM_ENV/marl_sim.py

Lines changed: 300 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,300 @@
1+
import irsim
2+
import numpy as np
3+
import random
4+
import torch
5+
6+
from robot_nav.SIM_ENV.sim_env import SIM_ENV
7+
8+
9+
class MARL_SIM(SIM_ENV):
10+
"""
11+
A simulation environment interface for robot navigation using IRSim in MARL setting.
12+
13+
This class wraps around the IRSim environment and provides methods for stepping,
14+
resetting, and interacting with mobile robots, including reward computation.
15+
16+
Attributes:
17+
env (object): The simulation environment instance from IRSim.
18+
robot_goal (np.ndarray): The goal position of the robot.
19+
"""
20+
21+
def __init__(self, world_file="multi_robot_world.yaml", disable_plotting=False):
22+
"""
23+
Initialize the simulation environment.
24+
25+
Args:
26+
world_file (str): Path to the world configuration YAML file.
27+
disable_plotting (bool): If True, disables rendering and plotting.
28+
"""
29+
display = False if disable_plotting else True
30+
self.env = irsim.make(
31+
world_file, disable_all_plot=disable_plotting, display=display
32+
)
33+
robot_info = self.env.get_robot_info(0)
34+
self.robot_goal = robot_info.goal
35+
self.num_robots = len(self.env.robot_list)
36+
self.x_range = self.env._world.x_range
37+
self.y_range = self.env._world.y_range
38+
39+
def step(self, action, connection, combined_weights=None):
40+
"""
41+
Perform one step in the simulation using the given control commands.
42+
43+
Args:
44+
lin_velocity (float): Linear velocity to apply to the robot.
45+
ang_velocity (float): Angular velocity to apply to the robot.
46+
47+
Returns:
48+
(tuple): Contains the latest LIDAR scan, distance to goal, cosine and sine of angle to goal,
49+
collision flag, goal reached flag, applied action, and computed reward.
50+
"""
51+
self.env.step(action_id=[i for i in range(self.num_robots)], action=action)
52+
self.env.render()
53+
54+
poses = []
55+
distances = []
56+
coss = []
57+
sins = []
58+
collisions = []
59+
goals = []
60+
rewards = []
61+
positions = []
62+
goal_positions = []
63+
robot_states = [
64+
[self.env.robot_list[i].state[0], self.env.robot_list[i].state[1]]
65+
for i in range(self.num_robots)
66+
]
67+
for i in range(self.num_robots):
68+
69+
robot_state = self.env.robot_list[i].state
70+
closest_robots = [
71+
np.linalg.norm(
72+
[
73+
robot_states[j][0] - robot_state[0],
74+
robot_states[j][1] - robot_state[1],
75+
]
76+
)
77+
for j in range(self.num_robots)
78+
if j != i
79+
]
80+
robot_goal = self.env.robot_list[i].goal
81+
goal_vector = [
82+
robot_goal[0].item() - robot_state[0].item(),
83+
robot_goal[1].item() - robot_state[1].item(),
84+
]
85+
distance = np.linalg.norm(goal_vector)
86+
goal = self.env.robot_list[i].arrive
87+
pose_vector = [np.cos(robot_state[2]).item(), np.sin(robot_state[2]).item()]
88+
cos, sin = self.cossin(pose_vector, goal_vector)
89+
collision = self.env.robot_list[i].collision
90+
action_i = action[i]
91+
reward = self.get_reward(
92+
goal, collision, action_i, closest_robots, distance
93+
)
94+
95+
position = [robot_state[0].item(), robot_state[1].item()]
96+
goal_position = [robot_goal[0].item(), robot_goal[1].item()]
97+
98+
distances.append(distance)
99+
coss.append(cos)
100+
sins.append(sin)
101+
collisions.append(collision)
102+
goals.append(goal)
103+
rewards.append(reward)
104+
positions.append(position)
105+
poses.append(
106+
[robot_state[0].item(), robot_state[1].item(), robot_state[2].item()]
107+
)
108+
goal_positions.append(goal_position)
109+
110+
i_probs = torch.sigmoid(
111+
connection[i]
112+
) # connection[i] is logits for "connect" per pair
113+
114+
# Now we need to insert the self-connection (optional, typically skipped)
115+
i_connections = i_probs.tolist()
116+
i_connections.insert(i, 0)
117+
if combined_weights is not None:
118+
i_weights = combined_weights[i].tolist()
119+
i_weights.insert(i, 0)
120+
121+
for j in range(self.num_robots):
122+
if i_connections[j] > 0.5:
123+
if combined_weights is not None:
124+
weight = i_weights[j]
125+
else:
126+
weight = 1
127+
other_robot_state = self.env.robot_list[j].state
128+
other_pos = [
129+
other_robot_state[0].item(),
130+
other_robot_state[1].item(),
131+
]
132+
rx = [position[0], other_pos[0]]
133+
ry = [position[1], other_pos[1]]
134+
self.env.draw_trajectory(
135+
np.array([rx, ry]), refresh=True, linewidth=weight
136+
)
137+
138+
if goal:
139+
self.env.robot_list[i].set_random_goal(
140+
obstacle_list=self.env.obstacle_list,
141+
init=True,
142+
range_limits=[
143+
[self.x_range[0] + 1, self.y_range[0] + 1, -3.141592653589793],
144+
[self.x_range[1] - 1, self.y_range[1] - 1, 3.141592653589793],
145+
],
146+
)
147+
148+
return (
149+
poses,
150+
distances,
151+
coss,
152+
sins,
153+
collisions,
154+
goals,
155+
action,
156+
rewards,
157+
positions,
158+
goal_positions,
159+
)
160+
161+
def reset(
162+
self,
163+
robot_state=None,
164+
robot_goal=None,
165+
random_obstacles=False,
166+
random_obstacle_ids=None,
167+
):
168+
"""
169+
Reset the simulation environment, optionally setting robot and obstacle states.
170+
171+
Args:
172+
robot_state (list or None): Initial state of the robot as a list of [x, y, theta, speed].
173+
robot_goal (list or None): Goal state for the robot.
174+
random_obstacles (bool): Whether to randomly reposition obstacles.
175+
random_obstacle_ids (list or None): Specific obstacle IDs to randomize.
176+
177+
Returns:
178+
(tuple): Initial observation after reset, including LIDAR scan, distance, cos/sin,
179+
and reward-related flags and values.
180+
"""
181+
if robot_state is None:
182+
robot_state = [[random.uniform(3, 9)], [random.uniform(3, 9)], [0]]
183+
184+
init_states = []
185+
for robot in self.env.robot_list:
186+
conflict = True
187+
while conflict:
188+
conflict = False
189+
robot_state = [
190+
[random.uniform(3, 9)],
191+
[random.uniform(3, 9)],
192+
[random.uniform(-3.14, 3.14)],
193+
]
194+
pos = [robot_state[0][0], robot_state[1][0]]
195+
for loc in init_states:
196+
vector = [
197+
pos[0] - loc[0],
198+
pos[1] - loc[1],
199+
]
200+
if np.linalg.norm(vector) < 0.6:
201+
conflict = True
202+
init_states.append(pos)
203+
204+
robot.set_state(
205+
state=np.array(robot_state),
206+
init=True,
207+
)
208+
209+
if random_obstacles:
210+
if random_obstacle_ids is None:
211+
random_obstacle_ids = [i + self.num_robots for i in range(7)]
212+
self.env.random_obstacle_position(
213+
range_low=[self.x_range[0], self.y_range[0], -3.14],
214+
range_high=[self.x_range[1], self.y_range[1], 3.14],
215+
ids=random_obstacle_ids,
216+
non_overlapping=True,
217+
)
218+
219+
for robot in self.env.robot_list:
220+
if robot_goal is None:
221+
robot.set_random_goal(
222+
obstacle_list=self.env.obstacle_list,
223+
init=True,
224+
range_limits=[
225+
[self.x_range[0] + 1, self.y_range[0] + 1, -3.141592653589793],
226+
[self.x_range[1] - 1, self.y_range[1] - 1, 3.141592653589793],
227+
],
228+
)
229+
else:
230+
self.env.robot.set_goal(np.array(robot_goal), init=True)
231+
self.env.reset()
232+
self.robot_goal = self.env.robot.goal
233+
234+
action = [[0.0, 0.0] for _ in range(self.num_robots)]
235+
con = torch.tensor(
236+
[[0.0 for _ in range(self.num_robots - 1)] for _ in range(self.num_robots)]
237+
)
238+
poses, distance, cos, sin, _, _, action, reward, positions, goal_positions = (
239+
self.step(action, con)
240+
)
241+
return (
242+
poses,
243+
distance,
244+
cos,
245+
sin,
246+
[False] * self.num_robots,
247+
[False] * self.num_robots,
248+
action,
249+
reward,
250+
positions,
251+
goal_positions,
252+
)
253+
254+
@staticmethod
255+
def get_reward(goal, collision, action, closest_robots, distance, phase=1):
256+
"""
257+
Calculate the reward for the current step.
258+
259+
Args:
260+
goal (bool): Whether the goal has been reached.
261+
collision (bool): Whether a collision occurred.
262+
action (list): The action taken [linear velocity, angular velocity].
263+
closest_robots (list): Distances to the closest robots.
264+
distance (float): Distance to goal.
265+
phase (int, optional): Reward function phase. Defaults to 1.
266+
267+
Returns:
268+
(float): Computed reward for the current state.
269+
"""
270+
271+
match phase:
272+
case 1:
273+
if goal:
274+
return 100.0
275+
elif collision:
276+
return -100.0 * 3 * action[0]
277+
else:
278+
r_dist = 1.5 / distance
279+
cl_pen = 0
280+
for rob in closest_robots:
281+
add = 1.5 - rob if rob < 1.5 else 0
282+
cl_pen += add
283+
284+
return action[0] - 0.5 * abs(action[1]) - cl_pen + r_dist
285+
286+
case 2:
287+
if goal:
288+
return 70.0
289+
elif collision:
290+
return -100.0 * 3 * action[0]
291+
else:
292+
cl_pen = 0
293+
for rob in closest_robots:
294+
add = (3 - rob) ** 2 if rob < 3 else 0
295+
cl_pen += add
296+
297+
return -0.5 * abs(action[1]) - cl_pen
298+
299+
case _:
300+
raise ValueError("Unknown reward phase")

robot_nav/sim.py renamed to robot_nav/SIM_ENV/sim.py

Lines changed: 2 additions & 21 deletions
Original file line numberDiff line numberDiff line change
@@ -2,11 +2,10 @@
22
import numpy as np
33
import random
44

5-
import shapely
6-
from irsim.lib.handler.geometry_handler import GeometryFactory
5+
from robot_nav.SIM_ENV.sim_env import SIM_ENV
76

87

9-
class SIM_ENV:
8+
class SIM(SIM_ENV):
109
"""
1110
A simulation environment interface for robot navigation using IRSim.
1211
@@ -121,24 +120,6 @@ def reset(
121120
)
122121
return latest_scan, distance, cos, sin, False, False, action, reward
123122

124-
@staticmethod
125-
def cossin(vec1, vec2):
126-
"""
127-
Compute the cosine and sine of the angle between two 2D vectors.
128-
129-
Args:
130-
vec1 (list): First 2D vector.
131-
vec2 (list): Second 2D vector.
132-
133-
Returns:
134-
(tuple): (cosine, sine) of the angle between the vectors.
135-
"""
136-
vec1 = vec1 / np.linalg.norm(vec1)
137-
vec2 = vec2 / np.linalg.norm(vec2)
138-
cos = np.dot(vec1, vec2)
139-
sin = vec1[0] * vec2[1] - vec1[1] * vec2[0]
140-
return cos, sin
141-
142123
@staticmethod
143124
def get_reward(goal, collision, action, laser_scan):
144125
"""

robot_nav/SIM_ENV/sim_env.py

Lines changed: 35 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,35 @@
1+
from abc import ABC, abstractmethod
2+
import numpy as np
3+
4+
5+
class SIM_ENV(ABC):
6+
@abstractmethod
7+
def step(self):
8+
raise NotImplementedError("step method must be implemented by subclass.")
9+
10+
@abstractmethod
11+
def reset(self):
12+
raise NotImplementedError("reset method must be implemented by subclass.")
13+
14+
@staticmethod
15+
def cossin(vec1, vec2):
16+
"""
17+
Compute the cosine and sine of the angle between two 2D vectors.
18+
19+
Args:
20+
vec1 (list): First 2D vector.
21+
vec2 (list): Second 2D vector.
22+
23+
Returns:
24+
(tuple): (cosine, sine) of the angle between the vectors.
25+
"""
26+
vec1 = vec1 / np.linalg.norm(vec1)
27+
vec2 = vec2 / np.linalg.norm(vec2)
28+
cos = np.dot(vec1, vec2)
29+
sin = vec1[0] * vec2[1] - vec1[1] * vec2[0]
30+
return cos, sin
31+
32+
@staticmethod
33+
@abstractmethod
34+
def get_reward():
35+
raise NotImplementedError("get_reward method must be implemented by subclass.")

robot_nav/models/CNNTD3/CNNTD3.py

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -325,8 +325,8 @@ def train(
325325
state = torch.Tensor(batch_states).to(self.device)
326326
next_state = torch.Tensor(batch_next_states).to(self.device)
327327
action = torch.Tensor(batch_actions).to(self.device)
328-
reward = torch.Tensor(batch_rewards).to(self.device)
329-
done = torch.Tensor(batch_dones).to(self.device)
328+
reward = torch.Tensor(batch_rewards).to(self.device).reshape(-1, 1)
329+
done = torch.Tensor(batch_dones).to(self.device).reshape(-1, 1)
330330

331331
# Obtain the estimated action from the next state by using the actor-target
332332
next_action = self.actor_target(next_state)

0 commit comments

Comments
 (0)