@@ -186,7 +186,7 @@ def update(batch):
186
186
with timeit ("collecting" ):
187
187
data = next (c_iter )
188
188
189
- log_info = {}
189
+ metrics_to_log = {}
190
190
frames_in_batch = data .numel ()
191
191
collected_frames += frames_in_batch
192
192
pbar .update (data .numel ())
@@ -195,7 +195,7 @@ def update(batch):
195
195
episode_rewards = data ["next" , "episode_reward" ][data ["next" , "done" ]]
196
196
if len (episode_rewards ) > 0 :
197
197
episode_length = data ["next" , "step_count" ][data ["next" , "done" ]]
198
- log_info .update (
198
+ metrics_to_log .update (
199
199
{
200
200
"train/reward" : episode_rewards .mean ().item (),
201
201
"train/episode_length" : episode_length .sum ().item ()
@@ -236,8 +236,8 @@ def update(batch):
236
236
# Get training losses
237
237
losses = torch .stack (losses ).float ().mean ()
238
238
for key , value in losses .items ():
239
- log_info .update ({f"train/{ key } " : value .item ()})
240
- log_info .update (
239
+ metrics_to_log .update ({f"train/{ key } " : value .item ()})
240
+ metrics_to_log .update (
241
241
{
242
242
"train/lr" : alpha * cfg .optim .lr ,
243
243
}
@@ -253,21 +253,19 @@ def update(batch):
253
253
test_rewards = eval_model (
254
254
actor , test_env , num_episodes = cfg .logger .num_test_episodes
255
255
)
256
- log_info .update (
256
+ metrics_to_log .update (
257
257
{
258
258
"test/reward" : test_rewards .mean (),
259
259
}
260
260
)
261
261
actor .train ()
262
262
263
- log_info .update (timeit .todict (prefix = "time" ))
264
-
265
263
if logger :
266
- for key , value in log_info .items ():
264
+ metrics_to_log .update (timeit .todict (prefix = "time" ))
265
+ metrics_to_log ["time/speed" ] = pbar .format_dict ["rate" ]
266
+ for key , value in metrics_to_log .items ():
267
267
logger .log_scalar (key , value , collected_frames )
268
268
269
- torch .compiler .cudagraph_mark_step_begin ()
270
-
271
269
collector .shutdown ()
272
270
if not test_env .is_closed :
273
271
test_env .close ()
0 commit comments