@@ -786,8 +786,6 @@ struct clip_graph {
786786 // residual 2
787787 cur = ggml_add (ctx0, cur, inpFF);
788788 cb (cur, " layer_out" , il);
789-
790- return cur; // B, 1024, 16, 16
791789 }
792790
793791 cur = ggml_cont (ctx0, ggml_permute (ctx0, inpL, 2 , 0 , 1 , 3 ));
@@ -1538,12 +1536,17 @@ struct clip_graph {
15381536 ggml_tensor * build_dp_ocr_clip (ggml_tensor * inpL, ggml_tensor * patch_embeds) {
15391537 GGML_ASSERT (model.class_embedding != nullptr );
15401538 GGML_ASSERT (model.position_embeddings != nullptr );
1541- auto n_embd_vit_clip = 1024 ;
15421539
15431540 const int n_pos = n_patches + 1 ;
15441541 ggml_tensor * inp =
15451542 ggml_cont_3d (ctx0, ggml_dup_tensor (ctx0, patch_embeds), patch_embeds->ne [0 ], n_patches_x, n_patches_y);
1546- // ggml_tensor * inp = ggml_cpy(ctx0, inpL, ggml_dup_tensor(ctx0, inpL));
1543+
1544+ auto inp_n_elems = ggml_nelements (inp);
1545+ GGML_ASSERT (inp_n_elems == inp->ne [0 ] * inp->ne [1 ] * inp->ne [2 ]);
1546+ inp = ggml_permute (ctx0, inp, 2 , 1 ,0 ,3 ); // [n_patches, n_embd]
1547+ inp = ggml_cont (ctx0, inp);
1548+ GGML_ASSERT (ggml_nelements (inp) == n_patches_x*patch_size*4 *768 );
1549+ inp= ggml_reshape_2d (ctx0,inp,n_patches_x*patch_size, 4 *768 );
15471550
15481551 // add CLS token
15491552 inp = ggml_concat (ctx0, inp, model.class_embedding , 1 );
0 commit comments