@@ -900,10 +900,12 @@ class TensorNameMap:
900900
901901 MODEL_TENSOR .V_MMPROJ_FC : (
902902 "model.connector.modality_projection.proj" , # SmolVLM
903+ "multi_modal_projector.linear_1" , # llama 4
903904 ),
904905
905906 MODEL_TENSOR .V_MMPROJ_MLP : (
906907 "model.mm_projector.mlp.mlp.{bid}" ,
908+ "vision_model.vision_adapter.mlp.fc{bid}.weight" , # llama 4
907909 ),
908910
909911 MODEL_TENSOR .V_MMPROJ_PEG : (
@@ -912,39 +914,45 @@ class TensorNameMap:
912914
913915 MODEL_TENSOR .V_ENC_EMBD_CLS : (
914916 "vision_tower.vision_model.embeddings.class_embedding" ,
917+ "vision_model.class_embedding" , # llama 4
915918 ),
916919
917920 MODEL_TENSOR .V_ENC_EMBD_PATCH : (
918921 "vision_tower.vision_model.embeddings.patch_embedding" ,
919922 "vpm.embeddings.patch_embedding" ,
920923 "model.vision_model.embeddings.patch_embedding" , # SmolVLM
921924 "vision_tower.patch_conv" , # pixtral
925+ "vision_model.patch_embedding.linear" , # llama 4
922926 ),
923927
924928 MODEL_TENSOR .V_ENC_EMBD_POS : (
925929 "vision_tower.vision_model.embeddings.position_embedding" ,
926930 "vpm.embeddings.position_embedding" ,
927931 "model.vision_model.embeddings.position_embedding" , # SmolVLM
932+ "vision_model.positional_embedding_vlm" , # llama 4
928933 ),
929934
930935 MODEL_TENSOR .V_ENC_ATTN_Q : (
931936 "vision_tower.vision_model.encoder.layers.{bid}.self_attn.q_proj" ,
932937 "vpm.encoder.layers.{bid}.self_attn.q_proj" ,
933938 "model.vision_model.encoder.layers.{bid}.self_attn.q_proj" , # SmolVLM
939+ "vision_model.model.layers.{bid}.self_attn.q_proj" , # llama4
934940 "vision_tower.transformer.layers.{bid}.attention.q_proj" , # pixtral
935941 ),
936942
937943 MODEL_TENSOR .V_ENC_ATTN_K : (
938944 "vision_tower.vision_model.encoder.layers.{bid}.self_attn.k_proj" ,
939945 "vpm.encoder.layers.{bid}.self_attn.k_proj" ,
940946 "model.vision_model.encoder.layers.{bid}.self_attn.k_proj" , # SmolVLM
947+ "vision_model.model.layers.{bid}.self_attn.k_proj" , # llama4
941948 "vision_tower.transformer.layers.{bid}.attention.k_proj" , # pixtral
942949 ),
943950
944951 MODEL_TENSOR .V_ENC_ATTN_V : (
945952 "vision_tower.vision_model.encoder.layers.{bid}.self_attn.v_proj" ,
946953 "vpm.encoder.layers.{bid}.self_attn.v_proj" ,
947954 "model.vision_model.encoder.layers.{bid}.self_attn.v_proj" , # SmolVLM
955+ "vision_model.model.layers.{bid}.self_attn.v_proj" , # llama4
948956 "vision_tower.transformer.layers.{bid}.attention.v_proj" , # pixtral
949957 ),
950958
@@ -953,19 +961,22 @@ class TensorNameMap:
953961 "vpm.encoder.layers.{bid}.layer_norm1" ,
954962 "model.vision_model.encoder.layers.{bid}.layer_norm1" , # SmolVLM
955963 "vision_tower.transformer.layers.{bid}.attention_norm" , # pixtral
964+ "vision_model.model.layers.{bid}.input_layernorm" , # llama4
956965 ),
957966
958- MODEL_TENSOR .V_ENC_OUTPUT : (
967+ MODEL_TENSOR .V_ENC_ATTN_O : (
959968 "vision_tower.vision_model.encoder.layers.{bid}.self_attn.out_proj" ,
960969 "vpm.encoder.layers.{bid}.self_attn.out_proj" ,
961970 "model.vision_model.encoder.layers.{bid}.self_attn.out_proj" , # SmolVLM
971+ "vision_model.model.layers.{bid}.self_attn.o_proj" , # llama4
962972 "vision_tower.transformer.layers.{bid}.attention.o_proj" , # pixtral
963973 ),
964974
965- MODEL_TENSOR .V_ENC_OUTPUT_NORM : (
975+ MODEL_TENSOR .V_ENC_POST_ATTN_NORM : (
966976 "vision_tower.vision_model.encoder.layers.{bid}.layer_norm2" ,
967977 "vpm.encoder.layers.{bid}.layer_norm2" ,
968978 "model.vision_model.encoder.layers.{bid}.layer_norm2" , # SmolVLM
979+ "vision_model.model.layers.{bid}.post_attention_layernorm" , # llama4
969980 "vision_tower.transformer.layers.{bid}.ffn_norm" , # pixtral
970981 ),
971982
@@ -974,6 +985,7 @@ class TensorNameMap:
974985 "vpm.encoder.layers.{bid}.mlp.fc1" ,
975986 "model.vision_model.encoder.layers.{bid}.mlp.fc2" , # SmolVLM, gemma3 (note: name is swapped)
976987 "vision_tower.transformer.layers.{bid}.feed_forward.up_proj" , # pixtral
988+ "vision_model.model.layers.{bid}.mlp.fc1" , # llama4
977989 ),
978990
979991 MODEL_TENSOR .V_ENC_FFN_GATE : (
@@ -985,16 +997,19 @@ class TensorNameMap:
985997 "vpm.encoder.layers.{bid}.mlp.fc2" ,
986998 "model.vision_model.encoder.layers.{bid}.mlp.fc1" , # SmolVLM, gemma3 (note: name is swapped)
987999 "vision_tower.transformer.layers.{bid}.feed_forward.down_proj" , # pixtral
1000+ "vision_model.model.layers.{bid}.mlp.fc2" , # llama4
9881001 ),
9891002
9901003 MODEL_TENSOR .V_PRE_NORM : (
9911004 "vision_tower.vision_model.pre_layrnorm" ,
9921005 "vision_tower.ln_pre" , # pixtral
1006+ "vision_model.layernorm_pre" , # llama4
9931007 ),
9941008
9951009 MODEL_TENSOR .V_POST_NORM : (
9961010 "vision_tower.vision_model.post_layernorm" ,
9971011 "model.vision_model.post_layernorm" , # SmolVLM
1012+ "vision_model.layernorm_post" , # llama4
9981013 ),
9991014
10001015 MODEL_TENSOR .V_MM_INP_PROJ : (
0 commit comments