Skip to content

Commit 215bb4e

Browse files
committed
clean old code
Signed-off-by: tc-mb <[email protected]>
1 parent b67f081 commit 215bb4e

File tree

2 files changed

+3
-114
lines changed

2 files changed

+3
-114
lines changed

tools/mtmd/clip.cpp

Lines changed: 1 addition & 108 deletions
Original file line numberDiff line numberDiff line change
@@ -919,112 +919,6 @@ struct clip_graph {
919919
return gf;
920920
}
921921

922-
ggml_cgraph * build_minicpmv_embedding() {
923-
GGML_ASSERT(model.class_embedding == nullptr);
924-
const int n_pos = n_patches;
925-
926-
// for selecting learned pos embd, used by ViT
927-
struct ggml_tensor * positions = ggml_new_tensor_1d(ctx0, GGML_TYPE_I32, n_pos);
928-
ggml_set_name(positions, "positions");
929-
ggml_set_input(positions);
930-
931-
ggml_tensor * learned_pos_embd = ggml_get_rows(ctx0, model.position_embeddings, positions);
932-
933-
ggml_tensor * inp = build_inp();
934-
if (learned_pos_embd) {
935-
inp = ggml_add(ctx0, inp, learned_pos_embd);
936-
cb(inp, "pos_embed", -1);
937-
}
938-
ggml_tensor * embeddings = inp;
939-
940-
// pre-layernorm
941-
if (model.pre_ln_w) {
942-
embeddings = ggml_norm(ctx0, embeddings, eps);
943-
ggml_set_name(embeddings, "pre_ln");
944-
embeddings = ggml_add(ctx0, ggml_mul(ctx0, embeddings, model.pre_ln_w), model.pre_ln_b);
945-
}
946-
947-
ggml_build_forward_expand(gf, embeddings);
948-
return gf;
949-
}
950-
951-
ggml_cgraph * build_minicpmv_resampler() {
952-
const int batch_size = 1;
953-
954-
GGML_ASSERT(model.class_embedding == nullptr);
955-
const int n_pos = n_patches;
956-
957-
const int image_size_width = img.nx;
958-
const int image_size_height = img.ny;
959-
const int patch_size = hparams.patch_size;
960-
const int num_patches = ((image_size_width / patch_size) * (image_size_height / patch_size));
961-
962-
// position embeddings for the projector (not for ViT)
963-
int n_output_dim = clip_n_mmproj_embd(ctx);
964-
ggml_tensor * pos_embed = ggml_new_tensor_3d(ctx0, GGML_TYPE_F32, n_output_dim, n_pos, batch_size);
965-
ggml_set_name(pos_embed, "pos_embed");
966-
ggml_set_input(pos_embed);
967-
968-
struct ggml_tensor * embeddings = ggml_new_tensor_2d(ctx0, GGML_TYPE_F32, 1152, num_patches);
969-
ggml_set_name(embeddings, "embeddings");
970-
ggml_set_input(embeddings);
971-
972-
// resampler projector (it is just another transformer)
973-
974-
ggml_tensor * q = model.mm_model_query;
975-
ggml_tensor * v = ggml_mul_mat(ctx0, model.mm_model_kv_proj, embeddings);
976-
977-
// norm
978-
q = build_norm(q, model.mm_model_ln_q_w, model.mm_model_ln_q_b, NORM_TYPE_NORMAL, eps, -1);
979-
v = build_norm(v, model.mm_model_ln_kv_w, model.mm_model_ln_kv_b, NORM_TYPE_NORMAL, eps, -1);
980-
981-
// k = v + pos_embed
982-
ggml_tensor * k = ggml_add(ctx0, v, pos_embed);
983-
984-
// attention
985-
{
986-
int n_embd = clip_n_mmproj_embd(ctx);
987-
const int d_head = 128;
988-
int n_head = n_embd/d_head;
989-
// Use actual config value if available, otherwise fall back to hardcoded values
990-
int num_query = ctx->model.hparams.minicpmv_query_num;
991-
992-
ggml_tensor * Q = ggml_add(ctx0,
993-
ggml_mul_mat(ctx0, model.mm_model_attn_q_w, q),
994-
model.mm_model_attn_q_b);
995-
ggml_tensor * K = ggml_add(ctx0,
996-
ggml_mul_mat(ctx0, model.mm_model_attn_k_w, k),
997-
model.mm_model_attn_k_b);
998-
ggml_tensor * V = ggml_add(ctx0,
999-
ggml_mul_mat(ctx0, model.mm_model_attn_v_w, v),
1000-
model.mm_model_attn_v_b);
1001-
1002-
Q = ggml_reshape_3d(ctx0, Q, d_head, n_head, num_query);
1003-
K = ggml_reshape_3d(ctx0, K, d_head, n_head, n_pos);
1004-
V = ggml_reshape_3d(ctx0, V, d_head, n_head, n_pos);
1005-
1006-
cb(Q, "resampler_Q", -1);
1007-
cb(K, "resampler_K", -1);
1008-
cb(V, "resampler_V", -1);
1009-
1010-
embeddings = build_attn(
1011-
model.mm_model_attn_o_w,
1012-
model.mm_model_attn_o_b,
1013-
Q, K, V, nullptr, kq_scale, -1);
1014-
cb(embeddings, "resampler_attn_out", -1);
1015-
}
1016-
// layernorm
1017-
embeddings = build_norm(embeddings, model.mm_model_ln_post_w, model.mm_model_ln_post_b, NORM_TYPE_NORMAL, eps, -1);
1018-
1019-
// projection
1020-
embeddings = ggml_mul_mat(ctx0, model.mm_model_proj, embeddings);
1021-
1022-
// build the graph
1023-
ggml_build_forward_expand(gf, embeddings);
1024-
1025-
return gf;
1026-
}
1027-
1028922
ggml_cgraph * build_internvl() {
1029923
GGML_ASSERT(model.class_embedding != nullptr);
1030924
GGML_ASSERT(model.position_embeddings != nullptr);
@@ -3371,10 +3265,9 @@ struct llava_uhd {
33713265
const int original_width = original_size.width;
33723266
const int original_height = original_size.height;
33733267

3374-
bool has_slices = original_size.width > slice_size || original_size.height > slice_size;
3268+
const bool has_slices = original_size.width > slice_size || original_size.height > slice_size;
33753269
const bool has_pinpoints = !ctx->model.hparams.image_res_candidates.empty();
33763270

3377-
// has_slices = false;
33783271
if (!has_slices) {
33793272
// skip slicing logic
33803273
res.overview_size = clip_image_size{slice_size, slice_size};

tools/mtmd/legacy-models/minicpmv-convert-image-encoder-to-gguf.py

Lines changed: 2 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -831,17 +831,13 @@ def _replace_name(s, v):
831831
if re.match("vision_model.embeddings.position_embedding", s):
832832
v = v.unsqueeze(0)
833833
return {s: v}
834-
print(s)
835-
if "emb" in s:
836-
return {s: v}
837-
return None
834+
835+
return {s: v}
838836

839837
state_dict = model.state_dict()
840838
new_state_dict = {}
841839
for k, v in state_dict.items():
842840
kvs = _replace_name(k, v)
843-
if kvs is None:
844-
continue
845841
for nk, nv in kvs.items():
846842
new_state_dict[nk] = nv
847843
state_dict = new_state_dict

0 commit comments

Comments
 (0)