Clean up comment

yanxi-chen · yanxi-chen · commit 64bc0027943c · 2025-10-30T15:06:20.000+08:00
diff --git a/trinity/trainer/verl/dp_actor.py b/trinity/trainer/verl/dp_actor.py
@@ -118,7 +118,7 @@ def update_policy(self, data: DataProto):  # noqa: C901
                     # calculate the total number of response tokens in the minibatch
                     mini_batch_token_num = torch.sum(
                         mini_batch.batch["response_mask"].to(get_device_id())
-                    ).item()  # TODO: double check this calculation
+                    ).item()
 
                 self.actor_optimizer.zero_grad()