Skip to content

Commit 9c2f5eb

Browse files
committed
update added noise
1 parent 62ded8e commit 9c2f5eb

File tree

3 files changed

+33
-17
lines changed

3 files changed

+33
-17
lines changed

robot_nav/models/CNNTD3/att.py

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -346,7 +346,9 @@ def get_action(self, obs, add_noise):
346346
"""
347347
action, connection, combined_weights = self.act(obs)
348348
if add_noise:
349-
action = (action + np.random.normal(0, 0.1, size=action.shape)
349+
noise = np.random.normal(0, 0.4, size=action.shape)
350+
noise = [n/4 if i%2 else n for i, n in enumerate(noise)]
351+
action = (action + noise
350352
).clip(-self.max_action, self.max_action)
351353

352354
return action.reshape(-1, 2), connection, combined_weights

robot_nav/multi_train2.py

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -56,9 +56,9 @@ def main(args=None):
5656
num_robots=sim.num_robots,
5757
device=device,
5858
save_every=save_every,
59-
load_model=False,
60-
model_name="phase1",
61-
load_model_name="phase1"
59+
load_model=True,
60+
model_name="phase3",
61+
load_model_name="phase2"
6262
) # instantiate a model
6363

6464
replay_buffer = get_buffer(

robot_nav/sim2.py

Lines changed: 27 additions & 13 deletions
Original file line numberDiff line numberDiff line change
@@ -249,23 +249,23 @@ def get_reward(goal, collision, action, closest_robots, distance):
249249
# return 2*action[0] - abs(action[1]) - cl_pen + r_dist
250250

251251
# phase1
252-
if goal:
253-
return 100.0
254-
elif collision:
255-
return -100.0
256-
else:
257-
r_dist = 1.5/distance
258-
cl_pen = 0
259-
for rob in closest_robots:
260-
add = 1.5 - rob if rob < 1.5 else 0
261-
cl_pen += add
262-
263-
return action[0] - 0.5 * abs(action[1])-cl_pen + r_dist
252+
# if goal:
253+
# return 100.0
254+
# elif collision:
255+
# return -100.0
256+
# else:
257+
# r_dist = 1.5/distance
258+
# cl_pen = 0
259+
# for rob in closest_robots:
260+
# add = 1.5 - rob if rob < 1.5 else 0
261+
# cl_pen += add
262+
#
263+
# return action[0] - 0.5 * abs(action[1])-cl_pen + r_dist
264264

265265

266266
# phase2
267267
# if goal:
268-
# return 80.0
268+
# return 100.0
269269
# elif collision:
270270
# return -100.0
271271
# else:
@@ -277,3 +277,17 @@ def get_reward(goal, collision, action, closest_robots, distance):
277277
#
278278
# return -0.5*abs(action[1])-cl_pen
279279

280+
# phase3
281+
if goal:
282+
return 100.0
283+
elif collision:
284+
return -100.0 * 3 * action[0]
285+
else:
286+
r_dist = 1.5 / distance
287+
cl_pen = 0
288+
for rob in closest_robots:
289+
add = 1.5 - rob if rob < 1.5 else 0
290+
cl_pen += add
291+
292+
return -0.5 * abs(action[1]) - cl_pen
293+

0 commit comments

Comments
 (0)