[bugfix] fix internvl new_special_tokens (#5401)

Jintao-Huang · Jintao-Huang · commit 0f72fdd99821 · 2025-08-15T11:05:14.000+08:00
diff --git a/swift/llm/model/patcher.py b/swift/llm/model/patcher.py
@@ -150,22 +150,21 @@ def _check_imports(filename) -> List[str]:
         td.check_imports = _old_check_imports
 
 
-def get_lm_head_model(model, model_meta, lm_heads):
+def get_lm_head_model(model, model_meta=None, lm_heads=None):
+    model_meta = model_meta or model.model_meta
+    lm_heads = lm_heads or ['lm_head']
     llm_prefix_list = getattr(model_meta.model_arch, 'language_model', None)
     prefix_list = []
     if llm_prefix_list:
         prefix_list = llm_prefix_list[0].split('.')
 
-    origin_model = model
     current_model = model
-    for prefix in [None] + prefix_list:
-        if prefix:
-            current_model = getattr(current_model, prefix)
+    for prefix in prefix_list:
+        current_model = getattr(current_model, prefix)
         for lm_head in lm_heads:
             if hasattr(current_model, lm_head):
                 return current_model
-
-    raise ValueError(f'Cannot find the lm_head. model: {origin_model}')
+    return model
 
 
 def _patch_sequence_classification(model, model_meta):
diff --git a/swift/llm/model/register.py b/swift/llm/model/register.py
@@ -22,8 +22,8 @@
 
 from swift.utils import get_dist_setting, get_logger, is_mp, is_unsloth_available, patch_getattr
 from .constant import ModelType
-from .patcher import (patch_automodel, patch_automodel_for_sequence_classification, patch_get_dynamic_module,
-                      patch_mp_ddp, patch_tp_plan)
+from .patcher import (get_lm_head_model, patch_automodel, patch_automodel_for_sequence_classification,
+                      patch_get_dynamic_module, patch_mp_ddp, patch_tp_plan)
 from .utils import AttnImpl, HfConfigFactory, InitModelStrategy, ModelInfo, safe_snapshot_download
 
 GetModelTokenizerFunction = Callable[..., Tuple[Optional[PreTrainedModel], PreTrainedTokenizerBase]]
@@ -665,11 +665,15 @@ def get_model_tokenizer(
         num_new_tokens = tokenizer.add_special_tokens({'additional_special_tokens': new_special_tokens})
         if num_new_tokens > 0:
             logger.info(f'Added {num_new_tokens} new special tokens.')
-            if model is not None and model.config.vocab_size < len(tokenizer):
-                vocab_size = math.ceil(len(tokenizer) / 128) * 128
-                model.resize_token_embeddings(vocab_size)
-                # fix transformers==4.52.4 qwen2.5-vl
-                model.config.vocab_size = vocab_size
+
+            if model is not None:
+                llm_model = get_lm_head_model(model, model_meta)
+                origin_vocab_size = HfConfigFactory.get_config_attr(llm_model.config, 'vocab_size')
+                if origin_vocab_size < len(tokenizer):
+                    vocab_size = math.ceil(len(tokenizer) / 128) * 128
+                    llm_model.resize_token_embeddings(vocab_size)
+                    # fix transformers==4.52.4 qwen2.5-vl
+                    HfConfigFactory.set_config_attr(llm_model.config, 'vocab_size', vocab_size)
 
     problem_type = kwargs.get('problem_type')
     if problem_type is None and model_info.num_labels == 1: