Skip to content

Commit 13b2d8c

Browse files
committed
fix Yi-VL model
1 parent 9d4baa6 commit 13b2d8c

File tree

2 files changed

+15
-8
lines changed

2 files changed

+15
-8
lines changed

examples/llava/clip.cpp

Lines changed: 8 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -817,8 +817,10 @@ static ggml_cgraph * clip_image_build_graph_legacy(clip_ctx * ctx, const clip_im
817817
embeddings = ggml_add(ctx0, embeddings, model.mm_0_b);
818818

819819
embeddings = ggml_gelu(ctx0, embeddings);
820-
embeddings = ggml_mul_mat(ctx0, model.mm_2_w, embeddings);
821-
embeddings = ggml_add(ctx0, embeddings, model.mm_2_b);
820+
if (model.mm_2_w) {
821+
embeddings = ggml_mul_mat(ctx0, model.mm_2_w, embeddings);
822+
embeddings = ggml_add(ctx0, embeddings, model.mm_2_b);
823+
}
822824
}
823825
else if (ctx->proj_type == PROJECTOR_TYPE_MLP_NORM) {
824826
embeddings = ggml_mul_mat(ctx0, model.mm_0_w, embeddings);
@@ -1356,6 +1358,10 @@ struct clip_model_loader {
13561358
vision_model.mm_3_b = get_tensor(string_format(TN_LLAVA_PROJ, 3, "bias"), false);
13571359
vision_model.mm_4_w = get_tensor(string_format(TN_LLAVA_PROJ, 4, "weight"), false);
13581360
vision_model.mm_4_b = get_tensor(string_format(TN_LLAVA_PROJ, 4, "bias"), false);
1361+
if (vision_model.mm_3_w) {
1362+
// TODO: this is a hack to support Yi-type llava
1363+
ctx_clip.proj_type = PROJECTOR_TYPE_MLP_NORM;
1364+
}
13591365
vision_model.image_newline = get_tensor(TN_IMAGE_NEWLINE, false);
13601366
} break;
13611367
case PROJECTOR_TYPE_LDP:

examples/llava/tests.sh

Lines changed: 7 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -25,16 +25,17 @@ add_test() {
2525
arr_hf+=("$hf")
2626
}
2727

28-
add_test "llama-gemma3-cli" "ggml-org/gemma-3-4b-it-GGUF"
29-
add_test "llama-llava-cli" "guinmoon/MobileVLM-3B-GGUF"
30-
add_test "llama-llava-cli" "THUDM/glm-edge-v-5b-gguf"
28+
add_test "llama-gemma3-cli" "ggml-org/gemma-3-4b-it-GGUF:Q4_K_M"
29+
add_test "llama-llava-cli" "cmp-nct/Yi-VL-6B-GGUF:Q5_K"
30+
add_test "llama-llava-cli" "guinmoon/MobileVLM-3B-GGUF:Q4_K_M"
31+
add_test "llama-llava-cli" "THUDM/glm-edge-v-5b-gguf:Q4_K_M"
3132
add_test "llama-llava-cli" "second-state/Llava-v1.5-7B-GGUF:Q2_K"
3233
add_test "llama-llava-cli" "cjpais/llava-1.6-mistral-7b-gguf:Q3_K"
33-
add_test "llama-llava-cli" "ibm-research/granite-vision-3.2-2b-GGUF"
34+
add_test "llama-llava-cli" "ibm-research/granite-vision-3.2-2b-GGUF:Q4_K_M"
3435
add_test "llama-minicpmv-cli" "second-state/MiniCPM-Llama3-V-2_5-GGUF:Q2_K" # model from openbmb is corrupted
3536
add_test "llama-minicpmv-cli" "openbmb/MiniCPM-V-2_6-gguf:Q2_K"
3637
add_test "llama-minicpmv-cli" "openbmb/MiniCPM-o-2_6-gguf:Q4_0"
37-
add_test "llama-qwen2vl-cli" "bartowski/Qwen2-VL-2B-Instruct-GGUF"
38+
add_test "llama-qwen2vl-cli" "bartowski/Qwen2-VL-2B-Instruct-GGUF:Q4_K_M"
3839

3940
###############
4041

@@ -50,7 +51,7 @@ for i in "${!arr_bin[@]}"; do
5051
echo ""
5152
echo ""
5253

53-
output=$("$PROJ_ROOT/build/bin/$bin" -hf "$hf" --image $SCRIPT_DIR/test-1.jpeg -p "what is the publisher name of the newspaper?" --temp 0 | tee /dev/tty)
54+
output=$("$PROJ_ROOT/build/bin/$bin" -hf "$hf" --image $SCRIPT_DIR/test-1.jpeg -p "what is the publisher name of the newspaper?" --temp 0 2>&1 | tee /dev/tty)
5455

5556
echo "$output" > $SCRIPT_DIR/output/$bin-$(echo "$hf" | tr '/' '-').log
5657

0 commit comments

Comments
 (0)