[MTP]Support RL reshard (#4074)

freeliuzc · web-flow · commit d2ab36942787 · 2025-09-15T11:47:06.000+08:00
* support rl reshard

* modify model name
diff --git a/fastdeploy/config.py b/fastdeploy/config.py
@@ -132,6 +132,7 @@ def __init__(
         self.eos_tokens_lens: int = 2
         self.lm_head_fp32: bool = False
         self.model_format = "auto"
+        self.num_nextn_predict_layers = 0
         for key, value in args.items():
             if hasattr(self, key):
                 setattr(self, key, value)
diff --git a/fastdeploy/multimodal/registry.py b/fastdeploy/multimodal/registry.py
@@ -24,6 +24,7 @@ class MultimodalRegistry:
         "Ernie4_5_VLMoeForConditionalGeneration",
         "Ernie5MoeForCausalLM",
         "Qwen2_5_VLForConditionalGeneration",
+        "Ernie5ForCausalLM",
     }
 
     @classmethod
diff --git a/fastdeploy/rl/rollout_config.py b/fastdeploy/rl/rollout_config.py
@@ -61,6 +61,7 @@ def __init__(
         local_rank: int = 0,
         moba_attention_config: str = None,
         data_parallel_size: int = 1,
+        num_nextn_predict_layers: int = 0,
     ):
         # Required parameters
         self.model = model_name_or_path
@@ -107,6 +108,7 @@ def __init__(
         self.early_stop_config = early_stop_config
         self.ips = None
         self.moba_attention_config = moba_attention_config
+        self.num_nextn_predict_layers = num_nextn_predict_layers
 
     def __str__(self):
         return "\n".join(f"{k}: {v}" for k, v in self.__dict__.items())

Original file line number	Diff line number	Diff line change
`@@ -24,6 +24,7 @@ class MultimodalRegistry:`
`24`	`24`	`"Ernie4_5_VLMoeForConditionalGeneration",`
`25`	`25`	`"Ernie5MoeForCausalLM",`
`26`	`26`	`"Qwen2_5_VLForConditionalGeneration",`
	`27`	`+ "Ernie5ForCausalLM",`
`27`	`28`	`}`
`28`	`29`
`29`	`30`	`@classmethod`