@@ -906,11 +906,13 @@ def _consolidate_qkv_weights(
906
906
def _rename_weight_for_modelopt_checkpoint (self , name : str ) -> str :
907
907
"""Rename weights from ModelOpt llama4 fp8 checkpoints to vLLM
908
908
format."""
909
- if name .startswith ("model." ):
909
+ if name .startswith ("model." ) or name .startswith (
910
+ "language_model.model." ):
911
+ renamed = name .replace ("model." , "language_model.model." ,
912
+ 1 ) if name .startswith ("model." ) else name
910
913
# Handle expert scale parameters with flat naming
911
914
if "feed_forward.experts." in name and ("_input_scale" in name or
912
915
"_weight_scale" in name ):
913
- renamed = name .replace ("model." , "language_model.model." , 1 )
914
916
# Map checkpoint naming to vLLM's expected naming
915
917
if "down_proj_input_scale" in renamed :
916
918
return renamed .replace ("down_proj_input_scale" ,
@@ -929,15 +931,14 @@ def _rename_weight_for_modelopt_checkpoint(self, name: str) -> str:
929
931
# Handle attention scale parameters
930
932
elif "self_attn." in name and (".k_scale" in name
931
933
or ".v_scale" in name ):
932
- renamed = name .replace ("model." , "language_model.model." , 1 )
933
934
if ".k_proj.k_scale" in renamed :
934
935
return renamed .replace (".k_proj.k_scale" , ".attn.k_scale" )
935
936
elif ".v_proj.v_scale" in renamed :
936
937
return renamed .replace (".v_proj.v_scale" , ".attn.v_scale" )
937
938
return renamed
938
939
939
940
# Standard model.* to language_model.model.* renaming
940
- return name . replace ( "model." , "language_model.model." , 1 )
941
+ return renamed
941
942
942
943
elif name .startswith ("lm_head.weight" ):
943
944
return name .replace ("lm_head.weight" ,
0 commit comments