emb to fp32 only when share with lm_head

RunningLeon · RunningLeon · commit 3386bd3d0e7d · 2026-01-22T15:39:00.000+08:00
diff --git a/lmdeploy/pytorch/config.py b/lmdeploy/pytorch/config.py
@@ -306,6 +306,7 @@ class ModelConfig:
 
     # fp32 lm head
     enforce_fp32_head: bool = False
+    tie_word_embeddings: bool = False
 
     def get_head_size(self):
         """Get head size."""
@@ -357,7 +358,10 @@ def from_pretrained(
             enforce_fp32_head = hf_overrides.pop('enforce_fp32_head', False)
             override_hf_config(model_config.hf_config, hf_overrides)
 
+        # for fp32 head
         model_config.enforce_fp32_head = enforce_fp32_head
+        model_config.tie_word_embeddings = getattr(hf_config, 'tie_word_embeddings', False)
+
         # for serialization of transformers modules
         maybe_register_config_serialize_by_value(trust_remote_code)
 
diff --git a/lmdeploy/pytorch/engine/model_agent/agent.py b/lmdeploy/pytorch/engine/model_agent/agent.py
@@ -1040,11 +1040,14 @@ def _build_model(self):
         # for router replay
         enable_return_routed_experts = self.misc_config.enable_return_routed_experts and self.need_output
 
-        build_model_ctx = BuildModelContext(disable_vision_encoder=self.misc_config.disable_vision_encoder,
-                                            dllm_config=self.misc_config.dllm_config,
-                                            strategy_factory=self.strategy_factory,
-                                            enable_return_routed_experts=enable_return_routed_experts,
-                                            enforce_fp32_head=self.model_config.enforce_fp32_head)
+        build_model_ctx = BuildModelContext(
+            disable_vision_encoder=self.misc_config.disable_vision_encoder,
+            dllm_config=self.misc_config.dllm_config,
+            strategy_factory=self.strategy_factory,
+            enable_return_routed_experts=enable_return_routed_experts,
+            enforce_fp32_head=self.model_config.enforce_fp32_head,
+            tie_word_embeddings=self.model_config.tie_word_embeddings,
+        )
         patched_model = build_patched_model(self.model_config,
                                             device=device,
                                             model_format=self.misc_config.model_format,
diff --git a/lmdeploy/pytorch/model_inputs.py b/lmdeploy/pytorch/model_inputs.py
@@ -390,6 +390,7 @@ class BuildModelContext:
     strategy_factory: 'StrategyFactoryBase' = None
     enable_return_routed_experts: bool = False
     enforce_fp32_head: bool = False
+    tie_word_embeddings: bool = False
 
 
 class StepContextManager(CtxMgrBase[StepContext]):
diff --git a/lmdeploy/pytorch/models/utils/model.py b/lmdeploy/pytorch/models/utils/model.py
@@ -126,7 +126,11 @@ def build_embedding(vocab_size: int,
     """Build embedding."""
     bm_ctx = get_build_model_context()
 
-    force_dtype = torch.float32 if bm_ctx.enforce_fp32_head else None
+    # run with fp32 only when share weights with lm_head
+    force_dtype = None
+    if bm_ctx.enforce_fp32_head and bm_ctx.tie_word_embeddings:
+        force_dtype = torch.float32
+
     return ParallelEmbedding(
         vocab_size,
         hidden_size,