@@ -1110,11 +1110,13 @@ class TensorNameMap:
11101110
11111111 MODEL_TENSOR .V_ENC_EMBD_CLS : (
11121112 "vision_tower.vision_model.embeddings.class_embedding" ,
1113+ "model.vision_tower.embeddings.cls_token" , # Intern-S1
11131114 "vision_model.class_embedding" , # llama 4
11141115 ),
11151116
11161117 MODEL_TENSOR .V_ENC_EMBD_PATCH : (
11171118 "vision_tower.vision_model.embeddings.patch_embedding" ,
1119+ "model.vision_tower.embeddings.patch_embeddings.projection" , # Intern-S1
11181120 "vpm.embeddings.patch_embedding" ,
11191121 "model.vision_model.embeddings.patch_embedding" , # SmolVLM
11201122 "vision_tower.patch_conv" , # pixtral
@@ -1124,13 +1126,15 @@ class TensorNameMap:
11241126
11251127 MODEL_TENSOR .V_ENC_EMBD_POS : (
11261128 "vision_tower.vision_model.embeddings.position_embedding" ,
1129+ "model.vision_tower.embeddings.position_embeddings" , # Intern-S1
11271130 "vpm.embeddings.position_embedding" ,
11281131 "model.vision_model.embeddings.position_embedding" , # SmolVLM
11291132 "vision_model.positional_embedding_vlm" , # llama 4
11301133 ),
11311134
11321135 MODEL_TENSOR .V_ENC_ATTN_Q : (
11331136 "vision_tower.vision_model.encoder.layers.{bid}.self_attn.q_proj" ,
1137+ "model.vision_tower.encoder.layer.{bid}.attention.q_proj" , # Intern-S1
11341138 "vpm.encoder.layers.{bid}.self_attn.q_proj" ,
11351139 "model.vision_model.encoder.layers.{bid}.self_attn.q_proj" , # SmolVLM
11361140 "vision_model.model.layers.{bid}.self_attn.q_proj" , # llama4
@@ -1140,10 +1144,12 @@ class TensorNameMap:
11401144
11411145 MODEL_TENSOR .V_ENC_ATTN_Q_NORM : (
11421146 "vision_tower.vision_model.encoder.layers.{bid}.attn.q_norm" , # InternVL
1147+ "model.vision_tower.encoder.layer.{bid}.attention.q_norm" , # Intern-S1
11431148 ),
11441149
11451150 MODEL_TENSOR .V_ENC_ATTN_K : (
11461151 "vision_tower.vision_model.encoder.layers.{bid}.self_attn.k_proj" ,
1152+ "model.vision_tower.encoder.layer.{bid}.attention.k_proj" , # Intern-S1
11471153 "vpm.encoder.layers.{bid}.self_attn.k_proj" ,
11481154 "model.vision_model.encoder.layers.{bid}.self_attn.k_proj" , # SmolVLM
11491155 "vision_model.model.layers.{bid}.self_attn.k_proj" , # llama4
@@ -1153,10 +1159,12 @@ class TensorNameMap:
11531159
11541160 MODEL_TENSOR .V_ENC_ATTN_K_NORM : (
11551161 "vision_tower.vision_model.encoder.layers.{bid}.attn.k_norm" , # InternVL
1162+ "model.vision_tower.encoder.layer.{bid}.attention.k_norm" , # Intern-S1
11561163 ),
11571164
11581165 MODEL_TENSOR .V_ENC_ATTN_V : (
11591166 "vision_tower.vision_model.encoder.layers.{bid}.self_attn.v_proj" ,
1167+ "model.vision_tower.encoder.layer.{bid}.attention.v_proj" , # Intern-S1
11601168 "vpm.encoder.layers.{bid}.self_attn.v_proj" ,
11611169 "model.vision_model.encoder.layers.{bid}.self_attn.v_proj" , # SmolVLM
11621170 "vision_model.model.layers.{bid}.self_attn.v_proj" , # llama4
@@ -1167,6 +1175,7 @@ class TensorNameMap:
11671175 MODEL_TENSOR .V_ENC_INPUT_NORM : (
11681176 "vision_tower.vision_model.encoder.layers.{bid}.layer_norm1" ,
11691177 "vision_tower.vision_model.encoder.layers.{bid}.norm1" , # InternVL
1178+ "model.vision_tower.encoder.layer.{bid}.layernorm_before" , # Intern-S1
11701179 "vpm.encoder.layers.{bid}.layer_norm1" ,
11711180 "model.vision_model.encoder.layers.{bid}.layer_norm1" , # SmolVLM
11721181 "vision_tower.transformer.layers.{bid}.attention_norm" , # pixtral
@@ -1177,6 +1186,7 @@ class TensorNameMap:
11771186 MODEL_TENSOR .V_ENC_ATTN_O : (
11781187 "vision_tower.vision_model.encoder.layers.{bid}.self_attn.out_proj" ,
11791188 "vision_tower.vision_model.encoder.layers.{bid}.attn.proj" , # InternVL
1189+ "model.vision_tower.encoder.layer.{bid}.attention.projection_layer" , # Intern-S1
11801190 "vpm.encoder.layers.{bid}.self_attn.out_proj" ,
11811191 "model.vision_model.encoder.layers.{bid}.self_attn.out_proj" , # SmolVLM
11821192 "vision_model.model.layers.{bid}.self_attn.o_proj" , # llama4
@@ -1187,6 +1197,7 @@ class TensorNameMap:
11871197 MODEL_TENSOR .V_ENC_POST_ATTN_NORM : (
11881198 "vision_tower.vision_model.encoder.layers.{bid}.layer_norm2" ,
11891199 "vision_tower.vision_model.encoder.layers.{bid}.norm2" , # InternVL
1200+ "model.vision_tower.encoder.layer.{bid}.layernorm_after" , # Intern-S1
11901201 "vpm.encoder.layers.{bid}.layer_norm2" ,
11911202 "model.vision_model.encoder.layers.{bid}.layer_norm2" , # SmolVLM
11921203 "vision_model.model.layers.{bid}.post_attention_layernorm" , # llama4
@@ -1196,6 +1207,7 @@ class TensorNameMap:
11961207
11971208 MODEL_TENSOR .V_ENC_FFN_UP : (
11981209 "vision_tower.vision_model.encoder.layers.{bid}.mlp.fc1" ,
1210+ "model.vision_tower.encoder.layer.{bid}.mlp.fc1" , # Intern-S1
11991211 "vpm.encoder.layers.{bid}.mlp.fc1" ,
12001212 "model.vision_model.encoder.layers.{bid}.mlp.fc1" , # SmolVLM, gemma3
12011213 "vision_tower.transformer.layers.{bid}.feed_forward.up_proj" , # pixtral
@@ -1211,6 +1223,7 @@ class TensorNameMap:
12111223
12121224 MODEL_TENSOR .V_ENC_FFN_DOWN : (
12131225 "vision_tower.vision_model.encoder.layers.{bid}.mlp.fc2" ,
1226+ "model.vision_tower.encoder.layer.{bid}.mlp.fc2" , # Intern-S1
12141227 "vpm.encoder.layers.{bid}.mlp.fc2" ,
12151228 "model.vision_model.encoder.layers.{bid}.mlp.fc2" , # SmolVLM, gemma3
12161229 "vision_tower.transformer.layers.{bid}.feed_forward.down_proj" , # pixtral
@@ -1221,10 +1234,12 @@ class TensorNameMap:
12211234
12221235 MODEL_TENSOR .V_LAYER_SCALE_1 : (
12231236 "vision_tower.vision_model.encoder.layers.{bid}.ls1" , # InternVL
1237+ "model.vision_tower.encoder.layer.{bid}.lambda_1" , # Intern-S1
12241238 ),
12251239
12261240 MODEL_TENSOR .V_LAYER_SCALE_2 : (
12271241 "vision_tower.vision_model.encoder.layers.{bid}.ls2" , # InternVL
1242+ "model.vision_tower.encoder.layer.{bid}.lambda_2" , # Intern-S1
12281243 ),
12291244
12301245 MODEL_TENSOR .V_PRE_NORM : (
0 commit comments