File tree Expand file tree Collapse file tree 1 file changed +2
-3
lines changed Expand file tree Collapse file tree 1 file changed +2
-3
lines changed Original file line number Diff line number Diff line change @@ -140,7 +140,6 @@ def train_net(self):
140
140
# pi = self.pi(s, softmax_dim=1)
141
141
# pi_a = pi.gather(1,a)
142
142
ratio = torch .exp (log_pi_a - torch .log (prob_a )) # a/b == exp(log(a)-log(b))
143
-
144
143
surr1 = ratio * advantage
145
144
surr2 = torch .clamp (ratio , 1 - eps_clip , 1 + eps_clip ) * advantage
146
145
loss = - torch .min (surr1 , surr2 ) + F .smooth_l1_loss (self .v (s ) , td_target .detach ())
@@ -150,7 +149,8 @@ def train_net(self):
150
149
self .optimizer .step ()
151
150
152
151
def main ():
153
- env = gym .make ('HalfCheetah-v2' )
152
+ # env = gym.make('HalfCheetah-v2')
153
+ env = gym .make ('Ant-v2' )
154
154
state_dim = env .observation_space .shape [0 ]
155
155
action_dim = env .action_space .shape [0 ]
156
156
hidden_dim = 128
@@ -180,7 +180,6 @@ def main():
180
180
181
181
if done :
182
182
break
183
-
184
183
if n_epi % print_interval == 0 and n_epi != 0 :
185
184
print ("# of episode :{}, avg score : {:.1f}" .format (n_epi , score / print_interval ))
186
185
score = 0.0
You can’t perform that action at this time.
0 commit comments