We read every piece of feedback, and take your input very seriously.
To see all available qualifiers, see our documentation.
1 parent 2f1e020 commit 64bc002Copy full SHA for 64bc002
trinity/trainer/verl/dp_actor.py
@@ -118,7 +118,7 @@ def update_policy(self, data: DataProto): # noqa: C901
118
# calculate the total number of response tokens in the minibatch
119
mini_batch_token_num = torch.sum(
120
mini_batch.batch["response_mask"].to(get_device_id())
121
- ).item() # TODO: double check this calculation
+ ).item()
122
123
self.actor_optimizer.zero_grad()
124
0 commit comments