1
1
import irsim
2
2
import numpy as np
3
3
import random
4
-
5
4
import torch
6
5
6
+ from robot_nav .SIM_ENV .sim_env import SIM_ENV
7
+
7
8
8
- class SIM_ENV :
9
+ class MARL_SIM ( SIM_ENV ) :
9
10
"""
10
11
A simulation environment interface for robot navigation using IRSim.
11
12
@@ -17,7 +18,7 @@ class SIM_ENV:
17
18
robot_goal (np.ndarray): The goal position of the robot.
18
19
"""
19
20
20
- def __init__ (self , world_file = "robot_world .yaml" , disable_plotting = False ):
21
+ def __init__ (self , world_file = "multi_robot_world .yaml" , disable_plotting = False ):
21
22
"""
22
23
Initialize the simulation environment.
23
24
@@ -33,7 +34,7 @@ def __init__(self, world_file="robot_world.yaml", disable_plotting=False):
33
34
self .robot_goal = robot_info .goal
34
35
self .num_robots = len (self .env .robot_list )
35
36
36
- def step (self , action , connection , combined_weights = None ):
37
+ def step (self , action , connection , combined_weights = None ):
37
38
"""
38
39
Perform one step in the simulation using the given control commands.
39
40
@@ -58,12 +59,23 @@ def step(self, action, connection, combined_weights = None):
58
59
rewards = []
59
60
positions = []
60
61
goal_positions = []
61
- robot_states = [[self .env .robot_list [i ].state [0 ], self .env .robot_list [i ].state [1 ]] for i in range (self .num_robots )]
62
+ robot_states = [
63
+ [self .env .robot_list [i ].state [0 ], self .env .robot_list [i ].state [1 ]]
64
+ for i in range (self .num_robots )
65
+ ]
62
66
for i in range (self .num_robots ):
63
67
64
68
robot_state = self .env .robot_list [i ].state
65
- closest_robots = [np .linalg .norm ([robot_states [j ][0 ] - robot_state [0 ], robot_states [j ][1 ] - robot_state [1 ]]) for j in
66
- range (self .num_robots ) if j != i ]
69
+ closest_robots = [
70
+ np .linalg .norm (
71
+ [
72
+ robot_states [j ][0 ] - robot_state [0 ],
73
+ robot_states [j ][1 ] - robot_state [1 ],
74
+ ]
75
+ )
76
+ for j in range (self .num_robots )
77
+ if j != i
78
+ ]
67
79
robot_goal = self .env .robot_list [i ].goal
68
80
goal_vector = [
69
81
robot_goal [0 ].item () - robot_state [0 ].item (),
@@ -75,7 +87,9 @@ def step(self, action, connection, combined_weights = None):
75
87
cos , sin = self .cossin (pose_vector , goal_vector )
76
88
collision = self .env .robot_list [i ].collision
77
89
action_i = action [i ]
78
- reward = self .get_reward (goal , collision , action_i , closest_robots , distance )
90
+ reward = self .get_reward (
91
+ goal , collision , action_i , closest_robots , distance
92
+ )
79
93
80
94
position = [robot_state [0 ].item (), robot_state [1 ].item ()]
81
95
goal_position = [robot_goal [0 ].item (), robot_goal [1 ].item ()]
@@ -87,14 +101,14 @@ def step(self, action, connection, combined_weights = None):
87
101
goals .append (goal )
88
102
rewards .append (reward )
89
103
positions .append (position )
90
- poses .append ([robot_state [0 ].item (), robot_state [1 ].item (), robot_state [2 ].item ()])
104
+ poses .append (
105
+ [robot_state [0 ].item (), robot_state [1 ].item (), robot_state [2 ].item ()]
106
+ )
91
107
goal_positions .append (goal_position )
92
108
93
- # gumbel_sample = torch.nn.functional.gumbel_softmax(connection[i], tau=0.5, dim=-1)
94
- # i_connections = gumbel_sample.tolist()
95
- # i_connections.insert(i, 0)
96
-
97
- i_probs = torch .sigmoid (connection [i ]) # connection[i] is logits for "connect" per pair
109
+ i_probs = torch .sigmoid (
110
+ connection [i ]
111
+ ) # connection[i] is logits for "connect" per pair
98
112
99
113
# Now we need to insert the self-connection (optional, typically skipped)
100
114
i_connections = i_probs .tolist ()
@@ -103,30 +117,45 @@ def step(self, action, connection, combined_weights = None):
103
117
i_weights = combined_weights [i ].tolist ()
104
118
i_weights .insert (i , 0 )
105
119
106
-
107
-
108
120
for j in range (self .num_robots ):
109
121
if i_connections [j ] > 0.5 :
110
122
if combined_weights is not None :
111
123
weight = i_weights [j ]
112
124
else :
113
125
weight = 1
114
126
other_robot_state = self .env .robot_list [j ].state
115
- other_pos = [other_robot_state [0 ].item (), other_robot_state [1 ].item ()]
127
+ other_pos = [
128
+ other_robot_state [0 ].item (),
129
+ other_robot_state [1 ].item (),
130
+ ]
116
131
rx = [position [0 ], other_pos [0 ]]
117
132
ry = [position [1 ], other_pos [1 ]]
118
- self .env .draw_trajectory (np .array ([rx , ry ]), refresh = True , linewidth = weight )
133
+ self .env .draw_trajectory (
134
+ np .array ([rx , ry ]), refresh = True , linewidth = weight
135
+ )
119
136
120
137
if goal :
121
138
self .env .robot_list [i ].set_random_goal (
122
139
obstacle_list = self .env .obstacle_list ,
123
140
init = True ,
124
- # range_limits=[[self.env.robot_list[i].position[0].item()-3, self.env.robot_list[i].position[1].item()-3, -3.141592653589793], [self.env.robot_list[i].position[0].item()+3, self.env.robot_list[i].position[1].item()+3, 3.141592653589793]],
125
- range_limits = [[1 , 1 , - 3.141592653589793 ],
126
- [11 , 11 , 3.141592653589793 ]],
141
+ range_limits = [
142
+ [1 , 1 , - 3.141592653589793 ],
143
+ [11 , 11 , 3.141592653589793 ],
144
+ ],
127
145
)
128
146
129
- return poses , distances , coss , sins , collisions , goals , action , rewards , positions , goal_positions
147
+ return (
148
+ poses ,
149
+ distances ,
150
+ coss ,
151
+ sins ,
152
+ collisions ,
153
+ goals ,
154
+ action ,
155
+ rewards ,
156
+ positions ,
157
+ goal_positions ,
158
+ )
130
159
131
160
def reset (
132
161
self ,
@@ -156,7 +185,11 @@ def reset(
156
185
conflict = True
157
186
while conflict :
158
187
conflict = False
159
- robot_state = [[random .uniform (3 , 9 )], [random .uniform (3 , 9 )], [random .uniform (- 3.14 , 3.14 )]]
188
+ robot_state = [
189
+ [random .uniform (3 , 9 )],
190
+ [random .uniform (3 , 9 )],
191
+ [random .uniform (- 3.14 , 3.14 )],
192
+ ]
160
193
pos = [robot_state [0 ][0 ], robot_state [1 ][0 ]]
161
194
for loc in init_states :
162
195
vector = [
@@ -187,37 +220,35 @@ def reset(
187
220
robot .set_random_goal (
188
221
obstacle_list = self .env .obstacle_list ,
189
222
init = True ,
190
- # range_limits=[[robot.position[0].item()-3, robot.position[1].item()-3, -3.141592653589793], [robot.position[0].item()+3, robot.position[1].item()+3, 3.141592653589793]],
191
- range_limits = [[1 , 1 , - 3.141592653589793 ],
192
- [11 , 11 , 3.141592653589793 ]],
223
+ range_limits = [
224
+ [1 , 1 , - 3.141592653589793 ],
225
+ [11 , 11 , 3.141592653589793 ],
226
+ ],
193
227
)
194
228
else :
195
229
self .env .robot .set_goal (np .array (robot_goal ), init = True )
196
230
self .env .reset ()
197
231
self .robot_goal = self .env .robot .goal
198
232
199
233
action = [[0.0 , 0.0 ] for _ in range (self .num_robots )]
200
- con = torch .tensor ([[0. for _ in range (self .num_robots - 1 )] for _ in range (self .num_robots )])
201
- poses , distance , cos , sin , _ , _ , action , reward , positions , goal_positions = self .step (action , con )
202
- return poses , distance , cos , sin , [False ]* self .num_robots , [False ]* self .num_robots , action , reward , positions , goal_positions
203
-
204
- @staticmethod
205
- def cossin (vec1 , vec2 ):
206
- """
207
- Compute the cosine and sine of the angle between two 2D vectors.
208
-
209
- Args:
210
- vec1 (list): First 2D vector.
211
- vec2 (list): Second 2D vector.
212
-
213
- Returns:
214
- (tuple): (cosine, sine) of the angle between the vectors.
215
- """
216
- vec1 = vec1 / np .linalg .norm (vec1 )
217
- vec2 = vec2 / np .linalg .norm (vec2 )
218
- cos = np .dot (vec1 , vec2 )
219
- sin = vec1 [0 ] * vec2 [1 ] - vec1 [1 ] * vec2 [0 ]
220
- return cos , sin
234
+ con = torch .tensor (
235
+ [[0.0 for _ in range (self .num_robots - 1 )] for _ in range (self .num_robots )]
236
+ )
237
+ poses , distance , cos , sin , _ , _ , action , reward , positions , goal_positions = (
238
+ self .step (action , con )
239
+ )
240
+ return (
241
+ poses ,
242
+ distance ,
243
+ cos ,
244
+ sin ,
245
+ [False ] * self .num_robots ,
246
+ [False ] * self .num_robots ,
247
+ action ,
248
+ reward ,
249
+ positions ,
250
+ goal_positions ,
251
+ )
221
252
222
253
@staticmethod
223
254
def get_reward (goal , collision , action , closest_robots , distance ):
@@ -233,52 +264,23 @@ def get_reward(goal, collision, action, closest_robots, distance):
233
264
Returns:
234
265
(float): Computed reward for the current state.
235
266
"""
236
- # if goal:
237
- # return 60.0
238
- # elif collision:
239
- # return -100.0
240
- # else:
241
- # cl_pen = 0
242
- # for rob in closest_robots:
243
- # add = 1.5 - rob if rob < 1.5 else 0
244
- # cl_pen += add
245
- # return -cl_pen
246
- # r_dist = 1.25/distance
247
- # cl_robot = min(closest_robots)
248
- # cl_pen = 0 - cl_robot if cl_robot < 0 else 0
249
- # return 2*action[0] - abs(action[1]) - cl_pen + r_dist
250
267
251
268
# phase1
252
269
if goal :
253
270
return 100.0
254
271
elif collision :
255
272
return - 100.0 * 3 * action [0 ]
256
273
else :
257
- r_dist = 1.5 / distance
274
+ r_dist = 1.5 / distance
258
275
cl_pen = 0
259
276
for rob in closest_robots :
260
277
add = 1.5 - rob if rob < 1.5 else 0
261
278
cl_pen += add
262
279
263
- return action [0 ] - 0.5 * abs (action [1 ])- cl_pen + r_dist
264
-
280
+ return action [0 ] - 0.5 * abs (action [1 ]) - cl_pen + r_dist
265
281
266
282
# phase2
267
283
# if goal:
268
- # return 100.0
269
- # elif collision:
270
- # return -100.0
271
- # else:
272
- # r_dist = 1.5/distance
273
- # cl_pen = 0
274
- # for rob in closest_robots:
275
- # add = 1.5 - rob if rob < 1.5 else 0
276
- # cl_pen += add
277
- #
278
- # return -0.5*abs(action[1])-cl_pen
279
-
280
- # phase3
281
- # if goal:
282
284
# return 70.0
283
285
# elif collision:
284
286
# return -100.0 * 3 * action[0]
@@ -290,4 +292,3 @@ def get_reward(goal, collision, action, closest_robots, distance):
290
292
# cl_pen += add
291
293
#
292
294
# return -0.5 * abs(action[1]) - cl_pen
293
-
0 commit comments