We read every piece of feedback, and take your input very seriously.
To see all available qualifiers, see our documentation.
There was an error while loading. Please reload this page.
1 parent 6d8c494 commit c5226f6Copy full SHA for c5226f6
ml-agents/mlagents/trainers/ppo/trainer.py
@@ -422,6 +422,7 @@ def update_policy(self):
422
number_experiences=len(self.training_buffer.update_buffer["actions"]),
423
mean_return=float(np.mean(self.cumulative_returns_since_policy_update)),
424
)
425
+ self.cumulative_returns_since_policy_update = []
426
n_sequences = max(
427
int(self.trainer_parameters["batch_size"] / self.policy.sequence_length), 1
428
0 commit comments