Skip to content

Commit 85764bd

Browse files
committed
clean up
1 parent f5cb848 commit 85764bd

File tree

1 file changed

+12
-14
lines changed

1 file changed

+12
-14
lines changed

tools/mtmd/clip.cpp

Lines changed: 12 additions & 14 deletions
Original file line numberDiff line numberDiff line change
@@ -288,8 +288,6 @@ struct clip_model {
288288
// GLMV-Edge projection
289289
ggml_tensor * mm_model_adapter_conv_w = nullptr;
290290
ggml_tensor * mm_model_adapter_conv_b = nullptr;
291-
ggml_tensor * mm_glm_tok_boi = nullptr;
292-
ggml_tensor * mm_glm_tok_eoi = nullptr;
293291

294292
// MobileVLM projection
295293
ggml_tensor * mm_model_mlp_1_w = nullptr;
@@ -1505,8 +1503,8 @@ struct clip_graph {
15051503
// note: these embeddings are not present in text model, hence we cannot process them as text tokens
15061504
// see: https://huggingface.co/THUDM/glm-edge-v-2b/blob/main/siglip.py#L53
15071505
{
1508-
embeddings = ggml_concat(ctx0, model.mm_glm_tok_boi, embeddings, 1); // BOI
1509-
embeddings = ggml_concat(ctx0, embeddings, model.mm_glm_tok_eoi, 1); // EOI
1506+
embeddings = ggml_concat(ctx0, model.mm_boi, embeddings, 1); // BOI
1507+
embeddings = ggml_concat(ctx0, embeddings, model.mm_eoi, 1); // EOI
15101508
}
15111509
}
15121510

@@ -2797,8 +2795,8 @@ struct clip_model_loader {
27972795
model.mm_model_mlp_1_w = get_tensor(string_format(TN_GLM_ADAPTER_D_H_2_4H, "weight"));
27982796
model.mm_model_mlp_2_w = get_tensor(string_format(TN_GLM_ADAPTER_GATE, "weight"));
27992797
model.mm_model_mlp_3_w = get_tensor(string_format(TN_GLM_ADAPTER_D_4H_2_H, "weight"));
2800-
model.mm_glm_tok_boi = get_tensor(string_format(TN_TOK_GLM_BOI, "weight"));
2801-
model.mm_glm_tok_eoi = get_tensor(string_format(TN_TOK_GLM_EOI, "weight"));
2798+
model.mm_boi = get_tensor(string_format(TN_TOK_GLM_BOI, "weight"));
2799+
model.mm_eoi = get_tensor(string_format(TN_TOK_GLM_EOI, "weight"));
28022800
} break;
28032801
case PROJECTOR_TYPE_QWEN2VL:
28042802
case PROJECTOR_TYPE_QWEN25VL:
@@ -2894,14 +2892,14 @@ struct clip_model_loader {
28942892
} break;
28952893
case PROJECTOR_TYPE_COGVLM:
28962894
{
2897-
model.mm_model_proj = get_tensor(TN_MM_PROJECTOR);
2895+
model.mm_model_proj = get_tensor(TN_MM_PROJECTOR);
28982896
model.mm_post_fc_norm_w = get_tensor(string_format(TN_MM_POST_FC_NORM, "weight"));
28992897
model.mm_post_fc_norm_b = get_tensor(string_format(TN_MM_POST_FC_NORM, "bias"));
2900-
model.mm_h_to_4h_w = get_tensor(string_format(TN_MM_H_TO_4H, "weight"));
2901-
model.mm_gate_w = get_tensor(string_format(TN_MM_GATE, "weight"));
2902-
model.mm_4h_to_h_w = get_tensor(string_format(TN_MM_4H_TO_H, "weight"));
2903-
model.mm_boi = get_tensor(TN_TOK_BOI);
2904-
model.mm_eoi = get_tensor(TN_TOK_EOI);
2898+
model.mm_h_to_4h_w = get_tensor(string_format(TN_MM_H_TO_4H, "weight"));
2899+
model.mm_gate_w = get_tensor(string_format(TN_MM_GATE, "weight"));
2900+
model.mm_4h_to_h_w = get_tensor(string_format(TN_MM_4H_TO_H, "weight"));
2901+
model.mm_boi = get_tensor(TN_TOK_BOI);
2902+
model.mm_eoi = get_tensor(TN_TOK_EOI);
29052903
} break;
29062904
default:
29072905
GGML_ASSERT(false && "unknown projector type");
@@ -3951,7 +3949,7 @@ int clip_n_output_tokens(const struct clip_ctx * ctx, struct clip_image_f32 * im
39513949
case PROJECTOR_TYPE_GLM_EDGE:
39523950
{
39533951
n_patches /= 4;
3954-
if (ctx->model.mm_glm_tok_boi) {
3952+
if (ctx->model.mm_boi) {
39553953
n_patches += 2; // for BOI and EOI token embeddings
39563954
}
39573955
} break;
@@ -4043,7 +4041,7 @@ int clip_n_output_tokens(const struct clip_ctx * ctx, struct clip_image_f32 * im
40434041
} break;
40444042
case PROJECTOR_TYPE_COGVLM:
40454043
{
4046-
n_patches += 2;
4044+
n_patches += 2; // for BOI and EOI token embeddings
40474045
} break;
40484046
default:
40494047
GGML_ABORT("unsupported projector type");

0 commit comments

Comments
 (0)