回退到batch=1

root · root · commit 1e6e960d8c4f · 2025-09-27T15:10:09.000+08:00
diff --git a/agentlightning/runner.py b/agentlightning/runner.py
@@ -240,9 +240,9 @@ async def run_async(self) -> bool:
                     # Pass the task input, not the whole task object
                     result = await rollout_method(task.input, task.rollout_id, resources_update.resources)
                     #降低最大rollout
-                    if len(result) > 5:
+                    if len(result) > 40:
                         import random
-                        result = random.sample(result,5)
+                        result = random.sample(result,40)
                     rollout_obj = self._to_rollout_object(result, task.rollout_id)
                     end_time = time.time()
                     logger.info(
@@ -254,11 +254,16 @@ async def run_async(self) -> bool:
                 logger.exception(f"{self._log_prefix(rollout_id)} Exception during rollout.")
                 MAX_TRY = MAX_TRY - 1
             finally:
-                try:
-                    self.agent.on_rollout_end(task, rollout_obj, self, self.tracer)
-                except Exception:
-                    logger.exception(f"{self._log_prefix(rollout_id)} Exception during on_rollout_end hook.")
-                await self.client.post_rollout_async(rollout_obj)
+                if rollout_obj.triplets:
+                    try:
+                        self.agent.on_rollout_end(task, rollout_obj, self, self.tracer)
+                    except Exception:
+                        logger.exception(f"{self._log_prefix(rollout_id)} Exception during on_rollout_end hook.")
+                    await self.client.post_rollout_async(rollout_obj)
+                else:
+                    print("Warning: error occured ,empty triplets")
+                    if MAX_TRY == 0:
+                        raise Exception("rollout_obj.triplets is EMPTY")
         return True
 
     async def iter_async(self) -> int:
diff --git a/agentlightning/verl/daemon.py b/agentlightning/verl/daemon.py
@@ -458,23 +458,8 @@ def get_train_data_batch(self, max_prompt_length, max_response_length, device):
             print(reward_list)
         n_transition = len(input_ids_list)
         print("***************************************",n_transition)
-
-        # # 直接扔掉多余的 transitions，限制最大数量(会报错)
-        # MAX_TRANSITIONS = 96 
-        # if n_transition > MAX_TRANSITIONS:
-        #     # 确保所有列表长度一致
-        #     input_ids_list = input_ids_list[:MAX_TRANSITIONS]
-        #     input_attention_mask_list = input_attention_mask_list[:MAX_TRANSITIONS]
-        #     response_ids_list = response_ids_list[:MAX_TRANSITIONS]
-        #     response_attention_mask_list = response_attention_mask_list[:MAX_TRANSITIONS]
-        #     reward_list = reward_list[:MAX_TRANSITIONS]
-        #     data_id_list = data_id_list[:MAX_TRANSITIONS]
-        #     rollout_id_list = rollout_id_list[:MAX_TRANSITIONS]
-        #     turn_index_list = turn_index_list[:MAX_TRANSITIONS]
-        #     is_drop_list = is_drop_list[:MAX_TRANSITIONS]
-            
-        #     n_transition = MAX_TRANSITIONS
-        #     print("********************MAX_TRANSITIONS*******************",n_transition)
+        if n_transition == 0:
+            raise Exception("Empty transitions !!!!!!!")
         batch_input_ids = torch.LongTensor(input_ids_list).to(device)
         input_attention_mask = torch.LongTensor(input_attention_mask_list).to(device)
         batch_response_ids = torch.LongTensor(response_ids_list).to(device)
diff --git a/examples/werewolf/train.sh b/examples/werewolf/train.sh
@@ -17,8 +17,8 @@ python -m agentlightning.verl \
     data.val_files=${DATA_DIR}/test.parquet \
     actor_rollout_ref.rollout.tensor_model_parallel_size=$ROLLOUT_TP_SIZE \
     trainer.n_gpus_per_node=${N_GPUS} \
-    data.train_batch_size=4 \
-    actor_rollout_ref.rollout.n=4 \
+    data.train_batch_size=1 \
+    actor_rollout_ref.rollout.n=1 \
     actor_rollout_ref.actor.ppo_mini_batch_size=8 \
     actor_rollout_ref.actor.ppo_micro_batch_size_per_gpu=1 \
     actor_rollout_ref.rollout.log_prob_micro_batch_size_per_gpu=1 \