fix bug in fully async example (issue #488) (#519)

Lez-3f · web-flow · commit 6d846620bb60 · 2025-10-20T12:01:39.000+08:00
diff --git a/examples/fully_async/fully_async_rollout.py b/examples/fully_async/fully_async_rollout.py
@@ -193,6 +193,23 @@ async def generate_rollout_async(args, rollout_id: int, data_buffer) -> List[Lis
 
             group = completed_groups.pop(group_id)
 
+            # If any sample in the group was aborted, return the whole group to the data buffer
+            # and do not forward it to the training engine.
+            try:
+                any_aborted = any([sample.status == Sample.Status.ABORTED for sample in group])
+            except Exception:
+                any_aborted = False
+
+            if any_aborted:
+                try:
+                    # add back to buffer so it can be retried or handled by buffer policy
+                    data_buffer.add_samples([group])
+                    print(f"Returned aborted group {group_id} to data buffer", flush=True)
+                except Exception as e:
+                    print(f"Failed to return aborted group {group_id} to buffer: {e}", flush=True)
+                # don't count as processed for training
+                continue
+
             if do_print:
                 print(
                     f"First rollout sample: {[group[0].prompt + group[0].response]}, "
diff --git a/examples/fully_async/run-qwen3-4b-fully_async.sh b/examples/fully_async/run-qwen3-4b-fully_async.sh
@@ -35,14 +35,19 @@ CKPT_ARGS=(
    --save-interval 20
 )
 
+PROMPT_SET=/path/to/dapo-math-17k.jsonl
+
 ROLLOUT_ARGS=(
    --rollout-function-path fully_async_rollout.generate_rollout_fully_async
-   --prompt-data /mnt/o1_alicloud/personal/zzl/rl_data/dapo-math-17k.jsonl
+   --prompt-data ${PROMPT_SET}
    --input-key prompt
    --label-key label
    --apply-chat-template
    --rollout-shuffle
-   --rm-type deepscaler
+
+   --rm-type dapo
+   --reward-key score
+
    --num-rollout 3000
    --rollout-batch-size 32
    --n-samples-per-prompt 8
diff --git a/slime/rollout/rm_hub/math_dapo_utils.py b/slime/rollout/rm_hub/math_dapo_utils.py
@@ -136,6 +136,8 @@ def __exit__(self, type, value, traceback):
     "{,}",
     '"',
     "\\dots",
+    "<|im_end|>",
+    "<|endoftext|>",
 ]
 
 
@@ -206,6 +208,8 @@ def is_correct_minerva(
     else:
         gt = normalize_final_answer(gt)
 
+    gt = str(int(float(gt)))  # in dapo, all answers are integers
+
     return (pred == gt), pred