Skip to content

Commit ccb2f23

Browse files
committed
mtmd: debug CLIP-L (vit_pre_ln)
1 parent a488b49 commit ccb2f23

File tree

1 file changed

+5
-6
lines changed

1 file changed

+5
-6
lines changed

tools/mtmd/clip.cpp

Lines changed: 5 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -1555,8 +1555,8 @@ struct clip_graph {
15551555
ggml_tensor * inp = ggml_cpy(ctx0, patch_embeds, ggml_dup_tensor(ctx0, patch_embeds));
15561556

15571557

1558-
inp = ggml_cont(ctx0,ggml_permute(ctx0, inp,2,1,0,3));
1559-
inp = ggml_reshape_2d(ctx0, inp, n_embd, inp->ne[1]*inp->ne[2]*inp->ne[3]);
1558+
inp = ggml_reshape_2d(ctx0, inp, inp->ne[0]*inp->ne[1], inp->ne[2]);
1559+
inp = ggml_cont(ctx0, ggml_permute(ctx0, inp, 1, 0, 2, 3));
15601560

15611561
ggml_tensor * new_pos_embd = ggml_cpy(ctx0, model.position_embeddings, ggml_dup_tensor(ctx0, model.position_embeddings));
15621562

@@ -1587,7 +1587,7 @@ struct clip_graph {
15871587

15881588

15891589
// add CLS token
1590-
inp = ggml_concat(ctx0, inp, model.class_embedding, 1);
1590+
inp = ggml_concat(ctx0, model.class_embedding, inp, 1);
15911591

15921592
//TODO : check norm type for dp-ocr-clip
15931593
norm_type norm_t = NORM_TYPE_NORMAL;
@@ -1596,7 +1596,6 @@ struct clip_graph {
15961596
ggml_tensor * positions = ggml_cast(ctx0, ggml_arange(ctx0, 0, n_pos, 1), GGML_TYPE_I32);
15971597
ggml_tensor * learned_pos_embd = ggml_get_rows(ctx0, new_pos_embd, positions);
15981598

1599-
16001599
ggml_tensor * cur = build_vit(inp, n_pos, norm_t, hparams.ffn_op, learned_pos_embd,
16011600
nullptr); // shape [1024, 16, 16]
16021601

@@ -2395,7 +2394,7 @@ struct clip_graph {
23952394
// pre-layernorm
23962395
if (model.pre_ln_w) {
23972396
inpL = build_norm(inpL, model.pre_ln_w, model.pre_ln_b, norm_t, eps, -1);
2398-
cb(inpL, "pre_ln", -1);
2397+
cb(inpL, "vit_pre_ln", -1);
23992398
}
24002399

24012400
// loop over layers
@@ -5808,7 +5807,7 @@ bool clip_image_batch_encode(clip_ctx * ctx, const int n_threads, const clip_ima
58085807
bool is_stored = false;
58095808
std::vector<std::string> patterns = {
58105809
/* Add tensor names here to dump (e.g. "sam_output") */
5811-
"sam_output"
5810+
"vit_pre_ln"
58125811
};
58135812

58145813
for (auto & p : patterns) {

0 commit comments

Comments
 (0)