@@ -1122,6 +1122,7 @@ class TensorNameMap:
11221122 "vision_encoder.patch_conv" , # pixtral
11231123 "vision_model.patch_embedding.linear" , # llama 4
11241124 "visual.patch_embed.proj" , # qwen2vl
1125+ "vision_tower.patch_embed.proj" , # kimi-vl
11251126 ),
11261127
11271128 MODEL_TENSOR .V_ENC_EMBD_POS : (
@@ -1130,6 +1131,7 @@ class TensorNameMap:
11301131 "vpm.embeddings.position_embedding" ,
11311132 "model.vision_model.embeddings.position_embedding" , # SmolVLM
11321133 "vision_model.positional_embedding_vlm" , # llama 4
1134+ "vision_tower.patch_embed.pos_emb" , # kimi-vl
11331135 ),
11341136
11351137 MODEL_TENSOR .V_ENC_ATTN_Q : (
@@ -1141,6 +1143,7 @@ class TensorNameMap:
11411143 "vision_tower.transformer.layers.{bid}.attention.q_proj" , # pixtral-hf
11421144 "vision_encoder.transformer.layers.{bid}.attention.wq" , # pixtral
11431145 "visual.blocks.{bid}.attn.q" , # qwen2vl, generated
1146+ "vision_tower.encoder.blocks.{bid}.wq" , # kimi-vl, generated
11441147 ),
11451148
11461149 MODEL_TENSOR .V_ENC_ATTN_Q_NORM : (
@@ -1157,6 +1160,7 @@ class TensorNameMap:
11571160 "vision_tower.transformer.layers.{bid}.attention.k_proj" , # pixtral-hf
11581161 "vision_encoder.transformer.layers.{bid}.attention.wk" , # pixtral
11591162 "visual.blocks.{bid}.attn.k" , # qwen2vl, generated
1163+ "vision_tower.encoder.blocks.{bid}.wk" , # kimi-vl, generated
11601164 ),
11611165
11621166 MODEL_TENSOR .V_ENC_ATTN_K_NORM : (
@@ -1173,6 +1177,7 @@ class TensorNameMap:
11731177 "vision_tower.transformer.layers.{bid}.attention.v_proj" , # pixtral-hf
11741178 "vision_encoder.transformer.layers.{bid}.attention.wv" , # pixtral
11751179 "visual.blocks.{bid}.attn.v" , # qwen2vl, generated
1180+ "vision_tower.encoder.blocks.{bid}.wv" , # kimi-vl, generated
11761181 ),
11771182
11781183 MODEL_TENSOR .V_ENC_INPUT_NORM : (
@@ -1185,6 +1190,7 @@ class TensorNameMap:
11851190 "vision_encoder.transformer.layers.{bid}.attention_norm" , # pixtral
11861191 "vision_model.model.layers.{bid}.input_layernorm" , # llama4
11871192 "visual.blocks.{bid}.norm1" , # qwen2vl
1193+ "vision_tower.encoder.blocks.{bid}.norm0" , # kimi-vl (norm0/norm1)
11881194 ),
11891195
11901196 MODEL_TENSOR .V_ENC_ATTN_O : (
@@ -1197,6 +1203,7 @@ class TensorNameMap:
11971203 "vision_tower.transformer.layers.{bid}.attention.o_proj" , # pixtral-hf
11981204 "vision_encoder.transformer.layers.{bid}.attention.wo" , # pixtral
11991205 "visual.blocks.{bid}.attn.proj" , # qwen2vl
1206+ "vision_tower.encoder.blocks.{bid}.wo" , # kimi-vl
12001207 ),
12011208
12021209 MODEL_TENSOR .V_ENC_POST_ATTN_NORM : (
@@ -1209,6 +1216,7 @@ class TensorNameMap:
12091216 "vision_tower.transformer.layers.{bid}.ffn_norm" , # pixtral-hf
12101217 "vision_encoder.transformer.layers.{bid}.ffn_norm" , # pixtral
12111218 "visual.blocks.{bid}.norm2" , # qwen2vl
1219+ "vision_tower.encoder.blocks.{bid}.norm1" , # kimi-vl (norm0/norm1)
12121220 ),
12131221
12141222 MODEL_TENSOR .V_ENC_FFN_UP : (
@@ -1221,6 +1229,7 @@ class TensorNameMap:
12211229 "vision_model.model.layers.{bid}.mlp.fc1" , # llama4
12221230 "visual.blocks.{bid}.mlp.fc1" , # qwen2vl
12231231 "visual.blocks.{bid}.mlp.up_proj" , # qwen2.5vl
1232+ "vision_tower.encoder.blocks.{bid}.mlp.fc0" , # kimi-vl (fc0/fc1)
12241233 ),
12251234
12261235 MODEL_TENSOR .V_ENC_FFN_GATE : (
@@ -1239,6 +1248,7 @@ class TensorNameMap:
12391248 "vision_model.model.layers.{bid}.mlp.fc2" , # llama4
12401249 "visual.blocks.{bid}.mlp.fc2" , # qwen2vl
12411250 "visual.blocks.{bid}.mlp.down_proj" , # qwen2.5vl
1251+ "vision_tower.encoder.blocks.{bid}.mlp.fc1" , # kimi-vl (fc0/fc1)
12421252 ),
12431253
12441254 MODEL_TENSOR .V_LAYER_SCALE_1 : (
@@ -1263,6 +1273,7 @@ class TensorNameMap:
12631273 "model.vision_model.post_layernorm" , # SmolVLM
12641274 "vision_model.layernorm_post" , # llama4
12651275 "visual.merger.ln_q" , # qwen2vl
1276+ "vision_tower.encoder.final_layernorm" , # kimi-vl
12661277 ),
12671278
12681279 MODEL_TENSOR .V_MM_INP_PROJ : (
@@ -1272,6 +1283,7 @@ class TensorNameMap:
12721283 MODEL_TENSOR .V_MM_INP_NORM : (
12731284 "multi_modal_projector.norm" ,
12741285 "multi_modal_projector.layer_norm" ,
1286+ "multi_modal_projector.pre_norm" ,
12751287 "pre_mm_projector_norm" ,
12761288 ),
12771289
0 commit comments