File tree Expand file tree Collapse file tree 4 files changed +12
-4
lines changed Expand file tree Collapse file tree 4 files changed +12
-4
lines changed Original file line number Diff line number Diff line change @@ -112,8 +112,10 @@ def __init__(
112
112
save_directory = Path ("robot_nav/models/BPG/checkpoint" ),
113
113
model_name = "BCNNPG" ,
114
114
load_directory = Path ("robot_nav/models/BPG/checkpoint" ),
115
+ bound_weight = 8
115
116
):
116
117
# Initialize the Actor network
118
+ self .bound_weight = bound_weight
117
119
self .device = device
118
120
self .actor = Actor (action_dim ).to (self .device )
119
121
self .actor_target = Actor (action_dim ).to (self .device )
@@ -223,7 +225,7 @@ def train(
223
225
224
226
# Calculate the loss between the current Q value and the target Q value
225
227
loss_target_Q = F .mse_loss (current_Q , target_Q )
226
- max_bound_loss = 10 * max_bound_loss_Q
228
+ max_bound_loss = self . bound_weight * max_bound_loss_Q
227
229
loss = loss_target_Q + max_bound_loss
228
230
# Perform the gradient descent
229
231
self .critic_optimizer .zero_grad ()
Original file line number Diff line number Diff line change @@ -128,8 +128,10 @@ def __init__(
128
128
save_directory = Path ("robot_nav/models/BPG/checkpoint" ),
129
129
model_name = "BCNNTD3" ,
130
130
load_directory = Path ("robot_nav/models/BPG/checkpoint" ),
131
+ bound_weight = 8
131
132
):
132
133
# Initialize the Actor network
134
+ self .bound_weight = bound_weight
133
135
self .device = device
134
136
self .actor = Actor (action_dim ).to (self .device )
135
137
self .actor_target = Actor (action_dim ).to (self .device )
@@ -244,7 +246,7 @@ def train(
244
246
loss_target_Q = F .mse_loss (current_Q1 , target_Q ) + F .mse_loss (
245
247
current_Q2 , target_Q
246
248
)
247
- max_bound_loss = 10 * (max_bound_loss_Q1 + max_bound_loss_Q2 )
249
+ max_bound_loss = self . bound_weight * (max_bound_loss_Q1 + max_bound_loss_Q2 )
248
250
loss = loss_target_Q + max_bound_loss
249
251
# Perform the gradient descent
250
252
self .critic_optimizer .zero_grad ()
Original file line number Diff line number Diff line change @@ -65,8 +65,10 @@ def __init__(
65
65
save_directory = Path ("robot_nav/models/BPG/checkpoint" ),
66
66
model_name = "BPG" ,
67
67
load_directory = Path ("robot_nav/models/BPG/checkpoint" ),
68
+ bound_weight = 8
68
69
):
69
70
# Initialize the Actor network
71
+ self .bound_weight = bound_weight
70
72
self .device = device
71
73
self .actor = Actor (state_dim , action_dim ).to (self .device )
72
74
self .actor_target = Actor (state_dim , action_dim ).to (self .device )
@@ -175,7 +177,7 @@ def train(
175
177
# Calculate the loss between the current Q value and the target Q value
176
178
loss_target_Q = F .mse_loss (current_Q , target_Q )
177
179
178
- max_bound_loss = 10 * max_bound_loss
180
+ max_bound_loss = self . bound_weight * max_bound_loss
179
181
loss = loss_target_Q + max_bound_loss
180
182
# Perform the gradient descent
181
183
self .critic_optimizer .zero_grad ()
Original file line number Diff line number Diff line change @@ -82,8 +82,10 @@ def __init__(
82
82
save_directory = Path ("robot_nav/models/BPG/checkpoint" ),
83
83
model_name = "BTD3" ,
84
84
load_directory = Path ("robot_nav/models/BPG/checkpoint" ),
85
+ bound_weight = 8
85
86
):
86
87
# Initialize the Actor network
88
+ self .bound_weight = bound_weight
87
89
self .device = device
88
90
self .actor = Actor (state_dim , action_dim ).to (self .device )
89
91
self .actor_target = Actor (state_dim , action_dim ).to (self .device )
@@ -197,7 +199,7 @@ def train(
197
199
loss_target_Q = F .mse_loss (current_Q1 , target_Q ) + F .mse_loss (
198
200
current_Q2 , target_Q
199
201
)
200
- max_bound_loss = 10 * (max_bound_loss_Q1 + max_bound_loss_Q2 )
202
+ max_bound_loss = self . bound_weight * (max_bound_loss_Q1 + max_bound_loss_Q2 )
201
203
loss = loss_target_Q + max_bound_loss
202
204
# Perform the gradient descent
203
205
self .critic_optimizer .zero_grad ()
You can’t perform that action at this time.
0 commit comments