Skip to content

Commit 38bd0b7

Browse files
committed
Add random testing environment
1 parent 7097123 commit 38bd0b7

File tree

8 files changed

+75
-59
lines changed

8 files changed

+75
-59
lines changed

robot_nav/eval_world.yaml

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -3,7 +3,7 @@ world:
33
width: 10 # the height of the world
44
step_time: 0.3 # Hz calculate each step
55
sample_time: 0.3 # Hz for render and data extraction
6-
collision_mode: 'react'
6+
collision_mode: 'reactive'
77

88
robot:
99
- kinematics: {name: 'diff'}

robot_nav/models/BPG/BCNNPG.py

Lines changed: 40 additions & 36 deletions
Original file line numberDiff line numberDiff line change
@@ -154,22 +154,23 @@ def act(self, state):
154154

155155
# training cycle
156156
def train(
157-
self,
158-
replay_buffer,
159-
iterations,
160-
batch_size,
161-
discount=0.99,
162-
tau=0.005,
163-
policy_noise=0.2,
164-
noise_clip=0.5,
165-
policy_freq=2,
166-
max_lin_vel=0.5,
167-
max_ang_vel=1,
168-
goal_reward=100,
169-
distance_norm=10,
170-
time_step=0.3,
157+
self,
158+
replay_buffer,
159+
iterations,
160+
batch_size,
161+
discount=0.99,
162+
tau=0.005,
163+
policy_noise=0.2,
164+
noise_clip=0.5,
165+
policy_freq=2,
166+
max_lin_vel=0.5,
167+
max_ang_vel=1,
168+
goal_reward=100,
169+
distance_norm=10,
170+
time_step=0.3,
171171
):
172172
av_Q = 0
173+
av_bound = 0
173174
max_b = 0
174175
max_Q = -inf
175176
av_loss = 0
@@ -225,11 +226,10 @@ def train(
225226
done,
226227
)
227228
max_b = max(max_b, torch.max(max_bound))
228-
max_bound_loss_Q = current_Q - max_bound
229-
max_bound_loss_Q[max_bound_loss_Q < 0] = 0
230-
max_bound_loss_Q = torch.square(max_bound_loss_Q).mean()
231-
max_bound_loss = max_bound_loss_Q
229+
av_bound += torch.mean(max_bound)
232230

231+
max_bound_Q = torch.min(current_Q, max_bound)
232+
max_bound_loss = F.mse_loss(current_Q, max_bound_Q)
233233
# Calculate the loss between the current Q value and the target Q value
234234
loss_target_Q = F.mse_loss(current_Q, target_Q)
235235

@@ -244,6 +244,7 @@ def train(
244244
# Maximize the actor output value by performing gradient descent on negative Q values
245245
# (essentially perform gradient ascent)
246246
actor_grad = self.critic(state, self.actor(state))
247+
actor_grad = torch.min(actor_grad, max_bound)
247248
actor_grad = -actor_grad.mean()
248249
self.actor_optimizer.zero_grad()
249250
actor_grad.backward()
@@ -252,15 +253,15 @@ def train(
252253
# Use soft update to update the actor-target network parameters by
253254
# infusing small amount of current parameters
254255
for param, target_param in zip(
255-
self.actor.parameters(), self.actor_target.parameters()
256+
self.actor.parameters(), self.actor_target.parameters()
256257
):
257258
target_param.data.copy_(
258259
tau * param.data + (1 - tau) * target_param.data
259260
)
260261
# Use soft update to update the critic-target network parameters by infusing
261262
# small amount of current parameters
262263
for param, target_param in zip(
263-
self.critic.parameters(), self.critic_target.parameters()
264+
self.critic.parameters(), self.critic_target.parameters()
264265
):
265266
target_param.data.copy_(
266267
tau * param.data + (1 - tau) * target_param.data
@@ -279,22 +280,25 @@ def train(
279280
"train/av_max_bound_loss", av_max_bound_loss / iterations, self.iter_count
280281
)
281282
self.writer.add_scalar("train/avg_Q", av_Q / iterations, self.iter_count)
283+
self.writer.add_scalar(
284+
"train/avg_bound", av_bound / iterations, self.iter_count
285+
)
282286
self.writer.add_scalar("train/max_b", max_b, self.iter_count)
283287
self.writer.add_scalar("train/max_Q", max_Q, self.iter_count)
284288
if self.save_every > 0 and self.iter_count % self.save_every == 0:
285289
self.save(filename=self.model_name, directory=self.save_directory)
286290

287291
def get_max_bound(
288-
self,
289-
next_state,
290-
discount,
291-
max_ang_vel,
292-
max_lin_vel,
293-
time_step,
294-
distance_norm,
295-
goal_reward,
296-
reward,
297-
done,
292+
self,
293+
next_state,
294+
discount,
295+
max_ang_vel,
296+
max_lin_vel,
297+
time_step,
298+
distance_norm,
299+
goal_reward,
300+
reward,
301+
done,
298302
):
299303
cos = next_state[:, -4]
300304
sin = next_state[:, -3]
@@ -304,7 +308,7 @@ def get_max_bound(
304308
full_turn_steps = torch.floor(turn_steps.abs())
305309
turn_rew = [
306310
(
307-
-1 * discount ** step * max_ang_vel
311+
-1 * discount**step * max_ang_vel
308312
if step
309313
else torch.zeros(1, device=self.device)
310314
)
@@ -326,20 +330,20 @@ def get_max_bound(
326330
final_steps = torch.ceil(distances) + full_turn_steps
327331
inter_steps = torch.trunc(distances) + full_turn_steps
328332
final_discount = torch.tensor(
329-
[discount ** pw for pw in final_steps], device=self.device
333+
[discount**pw for pw in final_steps], device=self.device
330334
)
331335
final_rew = (
332-
torch.ones_like(distances, device=self.device)
333-
* goal_reward
334-
* final_discount
336+
torch.ones_like(distances, device=self.device)
337+
* goal_reward
338+
* final_discount
335339
)
336340

337341
max_inter_steps = inter_steps.max()
338342
exponents = torch.arange(
339343
1, max_inter_steps + 1, dtype=torch.float32, device=self.device
340344
)
341345
discount_exponents = torch.tensor(
342-
[discount ** e for e in exponents], device=self.device
346+
[discount**e for e in exponents], device=self.device
343347
)
344348
inter_rew = torch.tensor(
345349
[

robot_nav/models/BPG/BCNNTD3.py

Lines changed: 10 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -186,6 +186,7 @@ def train(
186186
time_step=0.3,
187187
):
188188
av_Q = 0
189+
av_bound = 0
189190
max_b = 0
190191
max_Q = -inf
191192
av_loss = 0
@@ -242,12 +243,11 @@ def train(
242243
reward,
243244
)
244245
max_b += max(max_b, torch.max(max_bound))
245-
max_bound_loss_Q1 = current_Q1 - max_bound
246-
max_bound_loss_Q2 = current_Q2 - max_bound
247-
max_bound_loss_Q1[max_bound_loss_Q1 < 0] = 0
248-
max_bound_loss_Q2[max_bound_loss_Q2 < 0] = 0
249-
max_bound_loss_Q1 = torch.square(max_bound_loss_Q1).mean()
250-
max_bound_loss_Q2 = torch.square(max_bound_loss_Q1).mean()
246+
av_bound += torch.mean(max_bound)
247+
max_bound_Q1 = torch.min(current_Q1, max_bound)
248+
max_bound_loss_Q1 = F.mse_loss(current_Q1, max_bound_Q1)
249+
max_bound_Q2 = torch.min(current_Q2, max_bound)
250+
max_bound_loss_Q2 = F.mse_loss(current_Q2, max_bound_Q2)
251251

252252
# Calculate the loss between the current Q value and the target Q value
253253
loss_target_Q = F.mse_loss(current_Q1, target_Q) + F.mse_loss(
@@ -264,6 +264,7 @@ def train(
264264
# Maximize the actor output value by performing gradient descent on negative Q values
265265
# (essentially perform gradient ascent)
266266
actor_grad, _ = self.critic(state, self.actor(state))
267+
actor_grad = torch.min(actor_grad, max_bound)
267268
actor_grad = -actor_grad.mean()
268269
self.actor_optimizer.zero_grad()
269270
actor_grad.backward()
@@ -299,6 +300,9 @@ def train(
299300
"train/av_max_bound_loss", av_max_bound_loss / iterations, self.iter_count
300301
)
301302
self.writer.add_scalar("train/avg_Q", av_Q / iterations, self.iter_count)
303+
self.writer.add_scalar(
304+
"train/avg_bound", av_bound / iterations, self.iter_count
305+
)
302306
self.writer.add_scalar("train/max_b", max_b, self.iter_count)
303307
self.writer.add_scalar("train/max_Q", max_Q, self.iter_count)
304308
if self.save_every > 0 and self.iter_count % self.save_every == 0:

robot_nav/models/BPG/BPG.py

Lines changed: 8 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -123,6 +123,7 @@ def train(
123123
time_step=0.3,
124124
):
125125
av_Q = 0
126+
av_bound = 0
126127
max_b = 0
127128
max_Q = -inf
128129
av_loss = 0
@@ -178,11 +179,10 @@ def train(
178179
done,
179180
)
180181
max_b = max(max_b, torch.max(max_bound))
181-
max_bound_loss_Q = current_Q - max_bound
182-
max_bound_loss_Q[max_bound_loss_Q < 0] = 0
183-
max_bound_loss_Q = torch.square(max_bound_loss_Q).mean()
184-
max_bound_loss = max_bound_loss_Q
182+
av_bound += torch.mean(max_bound)
185183

184+
max_bound_Q = torch.min(current_Q, max_bound)
185+
max_bound_loss = F.mse_loss(current_Q, max_bound_Q)
186186
# Calculate the loss between the current Q value and the target Q value
187187
loss_target_Q = F.mse_loss(current_Q, target_Q)
188188

@@ -197,6 +197,7 @@ def train(
197197
# Maximize the actor output value by performing gradient descent on negative Q values
198198
# (essentially perform gradient ascent)
199199
actor_grad = self.critic(state, self.actor(state))
200+
actor_grad = torch.min(actor_grad, max_bound)
200201
actor_grad = -actor_grad.mean()
201202
self.actor_optimizer.zero_grad()
202203
actor_grad.backward()
@@ -232,6 +233,9 @@ def train(
232233
"train/av_max_bound_loss", av_max_bound_loss / iterations, self.iter_count
233234
)
234235
self.writer.add_scalar("train/avg_Q", av_Q / iterations, self.iter_count)
236+
self.writer.add_scalar(
237+
"train/avg_bound", av_bound / iterations, self.iter_count
238+
)
235239
self.writer.add_scalar("train/max_b", max_b, self.iter_count)
236240
self.writer.add_scalar("train/max_Q", max_Q, self.iter_count)
237241
if self.save_every > 0 and self.iter_count % self.save_every == 0:

robot_nav/models/BPG/BTD3.py

Lines changed: 10 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -140,6 +140,7 @@ def train(
140140
time_step=0.3,
141141
):
142142
av_Q = 0
143+
av_bound = 0
143144
max_b = 0
144145
max_Q = -inf
145146
av_loss = 0
@@ -196,12 +197,11 @@ def train(
196197
reward,
197198
)
198199
max_b += max(max_b, torch.max(max_bound))
199-
max_bound_loss_Q1 = current_Q1 - max_bound
200-
max_bound_loss_Q2 = current_Q2 - max_bound
201-
max_bound_loss_Q1[max_bound_loss_Q1 < 0] = 0
202-
max_bound_loss_Q2[max_bound_loss_Q2 < 0] = 0
203-
max_bound_loss_Q1 = torch.square(max_bound_loss_Q1).mean()
204-
max_bound_loss_Q2 = torch.square(max_bound_loss_Q1).mean()
200+
av_bound += torch.mean(max_bound)
201+
max_bound_Q1 = torch.min(current_Q1, max_bound)
202+
max_bound_loss_Q1 = F.mse_loss(current_Q1, max_bound_Q1)
203+
max_bound_Q2 = torch.min(current_Q2, max_bound)
204+
max_bound_loss_Q2 = F.mse_loss(current_Q2, max_bound_Q2)
205205

206206
# Calculate the loss between the current Q value and the target Q value
207207
loss_target_Q = F.mse_loss(current_Q1, target_Q) + F.mse_loss(
@@ -218,6 +218,7 @@ def train(
218218
# Maximize the actor output value by performing gradient descent on negative Q values
219219
# (essentially perform gradient ascent)
220220
actor_grad, _ = self.critic(state, self.actor(state))
221+
actor_grad = torch.min(actor_grad, max_bound)
221222
actor_grad = -actor_grad.mean()
222223
self.actor_optimizer.zero_grad()
223224
actor_grad.backward()
@@ -253,6 +254,9 @@ def train(
253254
"train/av_max_bound_loss", av_max_bound_loss / iterations, self.iter_count
254255
)
255256
self.writer.add_scalar("train/avg_Q", av_Q / iterations, self.iter_count)
257+
self.writer.add_scalar(
258+
"train/avg_bound", av_bound / iterations, self.iter_count
259+
)
256260
self.writer.add_scalar("train/max_b", max_b, self.iter_count)
257261
self.writer.add_scalar("train/max_Q", max_Q, self.iter_count)
258262
if self.save_every > 0 and self.iter_count % self.save_every == 0:

robot_nav/robot_world.yaml

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -3,7 +3,7 @@ world:
33
width: 10 # the height of the world
44
step_time: 0.3 # 10Hz calculate each step
55
sample_time: 0.3 # 10 Hz for render and data extraction
6-
collision_mode: 'react'
6+
collision_mode: 'reactive'
77

88
robot:
99
- kinematics: {name: 'diff'}

robot_nav/test_random.py

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -19,21 +19,21 @@ def main(args=None):
1919
"""Main testing function"""
2020
action_dim = 2 # number of actions produced by the model
2121
max_action = 1 # maximum absolute value of output actions
22-
state_dim = 25 # number of input values in the neural network (vector length of state input)
22+
state_dim = 185 # number of input values in the neural network (vector length of state input)
2323
device = torch.device(
2424
"cuda" if torch.cuda.is_available() else "cpu"
2525
) # using cuda if it is available, cpu otherwise
2626
epoch = 0 # epoch number
2727
max_steps = 300 # maximum number of steps in single episode
2828
test_scenarios = 1000
2929

30-
model = DDPG(
30+
model = BPG(
3131
state_dim=state_dim,
3232
action_dim=action_dim,
3333
max_action=max_action,
3434
device=device,
3535
load_model=True,
36-
model_name="DDPGexp5",
36+
model_name="tryBPGw025exp5",
3737
) # instantiate a model
3838

3939
sim = SIM_ENV(

robot_nav/train.py

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -39,14 +39,14 @@ def main(args=None):
3939
)
4040
save_every = 5 # save the model every n training cycles
4141

42-
model = BCNNPG(
42+
model = BPG(
4343
state_dim=state_dim,
4444
action_dim=action_dim,
4545
max_action=max_action,
4646
device=device,
4747
save_every=save_every,
4848
load_model=False,
49-
model_name="BCNNPGw025exp1",
49+
model_name="tryBPGw025exp5",
5050
bound_weight=0.25,
5151
) # instantiate a model
5252

0 commit comments

Comments
 (0)