Skip to content

Commit d290d93

Browse files
committed
first refactor
1 parent 9a54277 commit d290d93

22 files changed

+1216
-1633
lines changed

robot_nav/SIM_ENV/__init__.py

Whitespace-only changes.

robot_nav/sim2.py renamed to robot_nav/SIM_ENV/marl_sim.py

Lines changed: 80 additions & 79 deletions
Original file line numberDiff line numberDiff line change
@@ -1,11 +1,12 @@
11
import irsim
22
import numpy as np
33
import random
4-
54
import torch
65

6+
from robot_nav.SIM_ENV.sim_env import SIM_ENV
7+
78

8-
class SIM_ENV:
9+
class MARL_SIM(SIM_ENV):
910
"""
1011
A simulation environment interface for robot navigation using IRSim.
1112
@@ -17,7 +18,7 @@ class SIM_ENV:
1718
robot_goal (np.ndarray): The goal position of the robot.
1819
"""
1920

20-
def __init__(self, world_file="robot_world.yaml", disable_plotting=False):
21+
def __init__(self, world_file="multi_robot_world.yaml", disable_plotting=False):
2122
"""
2223
Initialize the simulation environment.
2324
@@ -33,7 +34,7 @@ def __init__(self, world_file="robot_world.yaml", disable_plotting=False):
3334
self.robot_goal = robot_info.goal
3435
self.num_robots = len(self.env.robot_list)
3536

36-
def step(self, action, connection, combined_weights = None):
37+
def step(self, action, connection, combined_weights=None):
3738
"""
3839
Perform one step in the simulation using the given control commands.
3940
@@ -58,12 +59,23 @@ def step(self, action, connection, combined_weights = None):
5859
rewards = []
5960
positions = []
6061
goal_positions = []
61-
robot_states = [[self.env.robot_list[i].state[0], self.env.robot_list[i].state[1]] for i in range(self.num_robots)]
62+
robot_states = [
63+
[self.env.robot_list[i].state[0], self.env.robot_list[i].state[1]]
64+
for i in range(self.num_robots)
65+
]
6266
for i in range(self.num_robots):
6367

6468
robot_state = self.env.robot_list[i].state
65-
closest_robots = [np.linalg.norm([robot_states[j][0] - robot_state[0], robot_states[j][1] - robot_state[1]]) for j in
66-
range(self.num_robots) if j != i]
69+
closest_robots = [
70+
np.linalg.norm(
71+
[
72+
robot_states[j][0] - robot_state[0],
73+
robot_states[j][1] - robot_state[1],
74+
]
75+
)
76+
for j in range(self.num_robots)
77+
if j != i
78+
]
6779
robot_goal = self.env.robot_list[i].goal
6880
goal_vector = [
6981
robot_goal[0].item() - robot_state[0].item(),
@@ -75,7 +87,9 @@ def step(self, action, connection, combined_weights = None):
7587
cos, sin = self.cossin(pose_vector, goal_vector)
7688
collision = self.env.robot_list[i].collision
7789
action_i = action[i]
78-
reward = self.get_reward(goal, collision, action_i, closest_robots, distance)
90+
reward = self.get_reward(
91+
goal, collision, action_i, closest_robots, distance
92+
)
7993

8094
position = [robot_state[0].item(), robot_state[1].item()]
8195
goal_position = [robot_goal[0].item(), robot_goal[1].item()]
@@ -87,14 +101,14 @@ def step(self, action, connection, combined_weights = None):
87101
goals.append(goal)
88102
rewards.append(reward)
89103
positions.append(position)
90-
poses.append([robot_state[0].item(), robot_state[1].item(), robot_state[2].item()])
104+
poses.append(
105+
[robot_state[0].item(), robot_state[1].item(), robot_state[2].item()]
106+
)
91107
goal_positions.append(goal_position)
92108

93-
# gumbel_sample = torch.nn.functional.gumbel_softmax(connection[i], tau=0.5, dim=-1)
94-
# i_connections = gumbel_sample.tolist()
95-
# i_connections.insert(i, 0)
96-
97-
i_probs = torch.sigmoid(connection[i]) # connection[i] is logits for "connect" per pair
109+
i_probs = torch.sigmoid(
110+
connection[i]
111+
) # connection[i] is logits for "connect" per pair
98112

99113
# Now we need to insert the self-connection (optional, typically skipped)
100114
i_connections = i_probs.tolist()
@@ -103,30 +117,45 @@ def step(self, action, connection, combined_weights = None):
103117
i_weights = combined_weights[i].tolist()
104118
i_weights.insert(i, 0)
105119

106-
107-
108120
for j in range(self.num_robots):
109121
if i_connections[j] > 0.5:
110122
if combined_weights is not None:
111123
weight = i_weights[j]
112124
else:
113125
weight = 1
114126
other_robot_state = self.env.robot_list[j].state
115-
other_pos = [other_robot_state[0].item(), other_robot_state[1].item()]
127+
other_pos = [
128+
other_robot_state[0].item(),
129+
other_robot_state[1].item(),
130+
]
116131
rx = [position[0], other_pos[0]]
117132
ry = [position[1], other_pos[1]]
118-
self.env.draw_trajectory(np.array([rx, ry]), refresh=True, linewidth=weight)
133+
self.env.draw_trajectory(
134+
np.array([rx, ry]), refresh=True, linewidth=weight
135+
)
119136

120137
if goal:
121138
self.env.robot_list[i].set_random_goal(
122139
obstacle_list=self.env.obstacle_list,
123140
init=True,
124-
# range_limits=[[self.env.robot_list[i].position[0].item()-3, self.env.robot_list[i].position[1].item()-3, -3.141592653589793], [self.env.robot_list[i].position[0].item()+3, self.env.robot_list[i].position[1].item()+3, 3.141592653589793]],
125-
range_limits=[[1, 1, -3.141592653589793],
126-
[11, 11, 3.141592653589793]],
141+
range_limits=[
142+
[1, 1, -3.141592653589793],
143+
[11, 11, 3.141592653589793],
144+
],
127145
)
128146

129-
return poses, distances, coss, sins, collisions, goals, action, rewards, positions, goal_positions
147+
return (
148+
poses,
149+
distances,
150+
coss,
151+
sins,
152+
collisions,
153+
goals,
154+
action,
155+
rewards,
156+
positions,
157+
goal_positions,
158+
)
130159

131160
def reset(
132161
self,
@@ -156,7 +185,11 @@ def reset(
156185
conflict = True
157186
while conflict:
158187
conflict = False
159-
robot_state = [[random.uniform(3, 9)], [random.uniform(3, 9)], [random.uniform(-3.14, 3.14)]]
188+
robot_state = [
189+
[random.uniform(3, 9)],
190+
[random.uniform(3, 9)],
191+
[random.uniform(-3.14, 3.14)],
192+
]
160193
pos = [robot_state[0][0], robot_state[1][0]]
161194
for loc in init_states:
162195
vector = [
@@ -187,37 +220,35 @@ def reset(
187220
robot.set_random_goal(
188221
obstacle_list=self.env.obstacle_list,
189222
init=True,
190-
# range_limits=[[robot.position[0].item()-3, robot.position[1].item()-3, -3.141592653589793], [robot.position[0].item()+3, robot.position[1].item()+3, 3.141592653589793]],
191-
range_limits=[[1, 1, -3.141592653589793],
192-
[11, 11, 3.141592653589793]],
223+
range_limits=[
224+
[1, 1, -3.141592653589793],
225+
[11, 11, 3.141592653589793],
226+
],
193227
)
194228
else:
195229
self.env.robot.set_goal(np.array(robot_goal), init=True)
196230
self.env.reset()
197231
self.robot_goal = self.env.robot.goal
198232

199233
action = [[0.0, 0.0] for _ in range(self.num_robots)]
200-
con = torch.tensor([[0. for _ in range(self.num_robots-1)] for _ in range(self.num_robots)])
201-
poses, distance, cos, sin, _, _, action, reward, positions, goal_positions = self.step(action, con)
202-
return poses, distance, cos, sin, [False]*self.num_robots, [False]*self.num_robots, action, reward, positions, goal_positions
203-
204-
@staticmethod
205-
def cossin(vec1, vec2):
206-
"""
207-
Compute the cosine and sine of the angle between two 2D vectors.
208-
209-
Args:
210-
vec1 (list): First 2D vector.
211-
vec2 (list): Second 2D vector.
212-
213-
Returns:
214-
(tuple): (cosine, sine) of the angle between the vectors.
215-
"""
216-
vec1 = vec1 / np.linalg.norm(vec1)
217-
vec2 = vec2 / np.linalg.norm(vec2)
218-
cos = np.dot(vec1, vec2)
219-
sin = vec1[0] * vec2[1] - vec1[1] * vec2[0]
220-
return cos, sin
234+
con = torch.tensor(
235+
[[0.0 for _ in range(self.num_robots - 1)] for _ in range(self.num_robots)]
236+
)
237+
poses, distance, cos, sin, _, _, action, reward, positions, goal_positions = (
238+
self.step(action, con)
239+
)
240+
return (
241+
poses,
242+
distance,
243+
cos,
244+
sin,
245+
[False] * self.num_robots,
246+
[False] * self.num_robots,
247+
action,
248+
reward,
249+
positions,
250+
goal_positions,
251+
)
221252

222253
@staticmethod
223254
def get_reward(goal, collision, action, closest_robots, distance):
@@ -233,52 +264,23 @@ def get_reward(goal, collision, action, closest_robots, distance):
233264
Returns:
234265
(float): Computed reward for the current state.
235266
"""
236-
# if goal:
237-
# return 60.0
238-
# elif collision:
239-
# return -100.0
240-
# else:
241-
# cl_pen = 0
242-
# for rob in closest_robots:
243-
# add = 1.5 - rob if rob < 1.5 else 0
244-
# cl_pen += add
245-
# return -cl_pen
246-
# r_dist = 1.25/distance
247-
# cl_robot = min(closest_robots)
248-
# cl_pen = 0 - cl_robot if cl_robot < 0 else 0
249-
# return 2*action[0] - abs(action[1]) - cl_pen + r_dist
250267

251268
# phase1
252269
if goal:
253270
return 100.0
254271
elif collision:
255272
return -100.0 * 3 * action[0]
256273
else:
257-
r_dist = 1.5/distance
274+
r_dist = 1.5 / distance
258275
cl_pen = 0
259276
for rob in closest_robots:
260277
add = 1.5 - rob if rob < 1.5 else 0
261278
cl_pen += add
262279

263-
return action[0] - 0.5 * abs(action[1])-cl_pen + r_dist
264-
280+
return action[0] - 0.5 * abs(action[1]) - cl_pen + r_dist
265281

266282
# phase2
267283
# if goal:
268-
# return 100.0
269-
# elif collision:
270-
# return -100.0
271-
# else:
272-
# r_dist = 1.5/distance
273-
# cl_pen = 0
274-
# for rob in closest_robots:
275-
# add = 1.5 - rob if rob < 1.5 else 0
276-
# cl_pen += add
277-
#
278-
# return -0.5*abs(action[1])-cl_pen
279-
280-
# phase3
281-
# if goal:
282284
# return 70.0
283285
# elif collision:
284286
# return -100.0 * 3 * action[0]
@@ -290,4 +292,3 @@ def get_reward(goal, collision, action, closest_robots, distance):
290292
# cl_pen += add
291293
#
292294
# return -0.5 * abs(action[1]) - cl_pen
293-

0 commit comments

Comments
 (0)