fix(reasoning-fsdp): fix a small bug in fsdp inference (RLinf#775)

qurakchin · web-flow · commit 08330be1d1e3 · 2026-03-06T16:56:27.000+08:00
Signed-off-by: Louis-J &lt;czzcy3832515@gmail.com&gt;
diff --git a/rlinf/data/io_struct.py b/rlinf/data/io_struct.py
@@ -878,6 +878,9 @@ def merge_batches(
         if len(batches) == 1:
             return batches[0]
 
+        assert all(batch.keys() == batches[0].keys() for batch in batches[1:]), (
+            "All batches must have the same keys"
+        )
         for key in batches[0].keys():
             if torch.is_tensor(batches[0][key]):
                 merged_batch[key] = torch.cat([batch[key] for batch in batches], dim=0)
diff --git a/rlinf/workers/actor/fsdp_actor_worker.py b/rlinf/workers/actor/fsdp_actor_worker.py
@@ -426,6 +426,9 @@ def get_dynamic_batch_as_much(
                 last_result_len = result_len
                 result_len = all_reduce_int(len(rollout_results))
 
+        cliped_results = list(rollout_results[result_len:])
+        rollout_results = rollout_results[:result_len]
+
         batches = []
         for rollout_result in rollout_results:
             batch = rollout_result.to_actor_batch(
diff --git a/rlinf/workers/reward/reward_worker.py b/rlinf/workers/reward/reward_worker.py
@@ -92,6 +92,8 @@ def compute_rewards(self, input_channel: Channel, output_channel: Channel):
                             rollout_result
                         )
             rollout_result = self.down_sample_batch(rollout_result)
+            # answer is not needed in training
+            rollout_result.answers = None
             output_channel.put(rollout_result, async_op=True)
 
         assert recv_batch_size == self.total_batch_size_per_dp, (

Original file line number	Diff line number	Diff line change
`@@ -92,6 +92,8 @@ def compute_rewards(self, input_channel: Channel, output_channel: Channel):`
`92`	`92`	`rollout_result`
`93`	`93`	`)`
`94`	`94`	`rollout_result = self.down_sample_batch(rollout_result)`
	`95`	`+ # answer is not needed in training`
	`96`	`+ rollout_result.answers = None`
`95`	`97`	`output_channel.put(rollout_result, async_op=True)`
`96`	`98`
`97`	`99`	`assert recv_batch_size == self.total_batch_size_per_dp, (`