add message for dapo

hiyuchang · hiyuchang · commit b0577ed9e70c · 2025-11-13T16:11:39.000+08:00
diff --git a/examples/dapo_math/README.md b/examples/dapo_math/README.md
@@ -1,5 +1,5 @@
 # DAPO on DAPO-MATH-17k dataset [WIP]
 
-This example shows the usage of DAPO on the [DAPO-MATH-17k](https://huggingface.co/datasets/open-r1/DAPO-Math-17k-Processed) dataset.
+Note this example only shows the usage of GRPO on the [DAPO-MATH-17k](https://huggingface.co/datasets/open-r1/DAPO-Math-17k-Processed) dataset. We plan to implement DAPO algorithm soon.
 
 The config file is located in [`dapo.yaml`](dapo.yaml).
diff --git a/examples/dapo_math/dapo.yaml b/examples/dapo_math/dapo.yaml
@@ -2,7 +2,7 @@ project: Trinity-RFT-example
 name: dapo
 checkpoint_root_dir: ${oc.env:TRINITY_CHECKPOINT_ROOT_DIR,./checkpoints}
 model:
-  model_path: ${oc.env:TRINITY_MODEL_PATH,Qwen/Qwen2.5-1.5B-Instruct}
+  model_path: ${oc.env:TRINITY_MODEL_PATH} # Suggest using larger model on this dataset
   max_response_tokens: 20480
   max_model_len: 21504
 algorithm:
diff --git a/trinity/common/verl_config.py b/trinity/common/verl_config.py
@@ -81,6 +81,7 @@ class FSDPConfig:
     wrap_policy: WrapPolicy = field(default_factory=WrapPolicy)
     fsdp_size: int = -1
     forward_prefetch: bool = False
+    model_dtype: Optional[str] = "fp32"
 
 
 @dataclass