@@ -508,13 +508,13 @@ struct clip_graph {
508508 const int patches_per_image = n_patches_x;
509509 const int kernel_size = hparams.proj_scale_factor ;
510510
511- cur = ggml_cont (ctx0, ggml_transpose (ctx0, cur) );
512- cur = ggml_reshape_4d (ctx0, cur, patches_per_image, patches_per_image, n_embd, batch_size);
511+ cur = ggml_transpose (ctx0, cur);
512+ cur = ggml_cont_4d (ctx0, cur, patches_per_image, patches_per_image, n_embd, batch_size);
513513
514514 // doing a pool2d to reduce the number of output tokens
515515 cur = ggml_pool_2d (ctx0, cur, GGML_OP_POOL_AVG, kernel_size, kernel_size, kernel_size, kernel_size, 0 , 0 );
516516 cur = ggml_reshape_3d (ctx0, cur, cur->ne [0 ] * cur->ne [0 ], n_embd, batch_size);
517- cur = ggml_cont (ctx0, ggml_transpose (ctx0, cur) );
517+ cur = ggml_transpose (ctx0, cur);
518518
519519 // apply norm before projection
520520 cur = ggml_rms_norm (ctx0, cur, eps);
@@ -537,13 +537,13 @@ struct clip_graph {
537537 GGML_ASSERT (scale_factor != 0 );
538538 cur = ggml_reshape_4d (ctx0, cur, n_embd * scale_factor, width / scale_factor, height, bsz);
539539 cur = ggml_permute (ctx0, cur, 0 , 2 , 1 , 3 );
540- cur = ggml_reshape_4d (ctx0, ggml_cont (ctx0, cur) ,
540+ cur = ggml_cont_4d (ctx0, cur,
541541 n_embd * scale_factor * scale_factor,
542542 height / scale_factor,
543543 width / scale_factor,
544544 bsz);
545545 cur = ggml_permute (ctx0, cur, 0 , 2 , 1 , 3 );
546- cur = ggml_reshape_3d (ctx0, ggml_cont (ctx0, cur) ,
546+ cur = ggml_cont_3d (ctx0, cur,
547547 n_embd * scale_factor * scale_factor,
548548 seq / (scale_factor * scale_factor),
549549 bsz);
@@ -570,13 +570,13 @@ struct clip_graph {
570570
571571 // unshuffle h
572572 cur = ggml_reshape_3d (ctx0, cur, n_embd * scale_factor, width / scale_factor, height);
573- cur = ggml_cont (ctx0, ggml_permute (ctx0, cur, 0 , 2 , 1 , 3 ) );
573+ cur = ggml_permute (ctx0, cur, 0 , 2 , 1 , 3 );
574574
575575 // unshuffle w
576- cur = ggml_reshape_3d (ctx0, cur, n_embd * scale_factor * scale_factor, height / scale_factor, width / scale_factor);
577- cur = ggml_cont (ctx0, ggml_permute (ctx0, cur, 0 , 2 , 1 , 3 ) );
576+ cur = ggml_cont_3d (ctx0, cur, n_embd * scale_factor * scale_factor, height / scale_factor, width / scale_factor);
577+ cur = ggml_permute (ctx0, cur, 0 , 2 , 1 , 3 );
578578
579- cur = ggml_reshape_2d (ctx0, cur, cur->ne [0 ], cur->ne [1 ] * cur->ne [2 ]);
579+ cur = ggml_cont_2d (ctx0, cur, cur->ne [0 ], cur->ne [1 ] * cur->ne [2 ]);
580580
581581 // projection
582582 cur = ggml_norm (ctx0, cur, 1e-5 ); // default nn.LayerNorm
@@ -715,15 +715,15 @@ struct clip_graph {
715715 auto inp_1 = ggml_conv_2d (ctx0, model.patch_embeddings_1 , inp_raw, patch_size, patch_size, 0 , 0 , 1 , 1 );
716716 inp = ggml_add (ctx0, inp, inp_1);
717717
718- inp = ggml_cont (ctx0, ggml_permute (ctx0, inp, 1 , 2 , 0 , 3 ) ); // [w, h, c, b] -> [c, w, h, b]
719- inp = ggml_reshape_4d (
718+ inp = ggml_permute (ctx0, inp, 1 , 2 , 0 , 3 ); // [w, h, c, b] -> [c, w, h, b]
719+ inp = ggml_cont_4d (
720720 ctx0, inp,
721721 n_embd * 2 , n_patches_x / 2 , n_patches_y, batch_size);
722722 inp = ggml_reshape_4d (
723723 ctx0, inp,
724724 n_embd * 2 , n_patches_x / 2 , 2 , batch_size * (n_patches_y / 2 ));
725- inp = ggml_cont (ctx0, ggml_permute (ctx0, inp, 0 , 2 , 1 , 3 ) );
726- inp = ggml_reshape_3d (
725+ inp = ggml_permute (ctx0, inp, 0 , 2 , 1 , 3 );
726+ inp = ggml_cont_3d (
727727 ctx0, inp,
728728 n_embd, n_patches_x * n_patches_y, batch_size);
729729 }
@@ -988,14 +988,14 @@ struct clip_graph {
988988 GGML_ASSERT (scale_factor > 0 );
989989 cur = ggml_reshape_4d (ctx0, cur, n_embd * scale_factor, height / scale_factor, width, bsz);
990990 cur = ggml_permute (ctx0, cur, 0 , 2 , 1 , 3 );
991- cur = ggml_reshape_4d (ctx0, ggml_cont (ctx0, cur) ,
991+ cur = ggml_cont_4d (ctx0, cur,
992992 n_embd * scale_factor * scale_factor,
993993 height / scale_factor,
994994 width / scale_factor,
995995 bsz);
996996 cur = ggml_permute (ctx0, cur, 0 , 2 , 1 , 3 );
997997 // flatten to 2D
998- cur = ggml_reshape_2d (ctx0, ggml_cont (ctx0, cur) ,
998+ cur = ggml_cont_2d (ctx0, cur,
999999 n_embd * scale_factor * scale_factor,
10001000 cur->ne [1 ] * cur->ne [2 ]);
10011001 }
@@ -1081,14 +1081,14 @@ struct clip_graph {
10811081 n_patches_y,
10821082 bsz);
10831083 cur = ggml_permute (ctx0, cur, 0 , 2 , 1 , 3 );
1084- cur = ggml_reshape_4d (ctx0, ggml_cont (ctx0, cur) ,
1084+ cur = ggml_cont_4d (ctx0, cur,
10851085 n_embd * scale_factor * scale_factor,
10861086 n_patches_x / scale_factor,
10871087 n_patches_y / scale_factor,
10881088 bsz);
10891089 cur = ggml_permute (ctx0, cur, 0 , 2 , 1 , 3 );
10901090 // flatten to 2D
1091- cur = ggml_reshape_2d (ctx0, ggml_cont (ctx0, cur) ,
1091+ cur = ggml_cont_2d (ctx0, cur,
10921092 n_embd * scale_factor * scale_factor,
10931093 n_patches / scale_factor / scale_factor);
10941094 cb (cur, " pixel_shuffle" , -1 );
@@ -1321,18 +1321,18 @@ struct clip_graph {
13211321 ggml_tensor * block_1 = nullptr ;
13221322 {
13231323 // transpose from [1, 576, 2048] --> [1, 2048, 576] --> [1, 2048, 24, 24]
1324- mlp_3 = ggml_cont (ctx0, ggml_permute (ctx0, mlp_3, 1 , 0 , 2 , 3 ) );
1325- mlp_3 = ggml_reshape_4d (ctx0, mlp_3, n_patch, n_patch, mlp_3->ne [1 ], mlp_3->ne [2 ]);
1324+ mlp_3 = ggml_permute (ctx0, mlp_3, 1 , 0 , 2 , 3 );
1325+ mlp_3 = ggml_cont_4d (ctx0, mlp_3, n_patch, n_patch, mlp_3->ne [1 ], mlp_3->ne [2 ]);
13261326 // stride = 1, padding = 1, bias is nullptr
13271327 block_1 = ggml_conv_2d_dw (ctx0, model.mm_model_block_1_block_0_0_w , mlp_3, 1 , 1 , 1 , 1 , 1 , 1 );
13281328
13291329 // layer norm
13301330 // // block_1 shape = [1, 2048, 24, 24], ne = [24, 24, 2048, 1]
1331- block_1 = ggml_cont (ctx0, ggml_permute (ctx0, block_1, 1 , 2 , 0 , 3 ) );
1331+ block_1 = ggml_permute (ctx0, block_1, 1 , 2 , 0 , 3 );
13321332 // block_1 shape = [1, 24, 24, 2048], ne = [2048, 24, 24, 1]
13331333 block_1 = ggml_norm (ctx0, block_1, eps);
13341334 block_1 = ggml_add (ctx0, ggml_mul (ctx0, block_1, model.mm_model_block_1_block_0_1_w ), model.mm_model_block_1_block_0_1_b );
1335- block_1 = ggml_cont (ctx0, ggml_permute (ctx0, block_1, 2 , 0 , 1 , 3 ) );
1335+ block_1 = ggml_permute (ctx0, block_1, 2 , 0 , 1 , 3 );
13361336
13371337 // block_1 shape = [1, 2048, 24, 24], ne = [24, 24, 2048, 1]
13381338 // hardswish
@@ -1376,11 +1376,11 @@ struct clip_graph {
13761376
13771377 // block_1 shape = [1, 2048, 12, 12], ne = [12, 12, 2048, 1]
13781378 // layer norm
1379- block_1 = ggml_cont (ctx0, ggml_permute (ctx0, block_1, 1 , 2 , 0 , 3 ) );
1379+ block_1 = ggml_permute (ctx0, block_1, 1 , 2 , 0 , 3 );
13801380 // block_1 shape = [1, 12, 12, 2048], ne = [2048, 12, 12, 1]
13811381 block_1 = ggml_norm (ctx0, block_1, eps);
13821382 block_1 = ggml_add (ctx0, ggml_mul (ctx0, block_1, model.mm_model_block_2_block_0_1_w ), model.mm_model_block_2_block_0_1_b );
1383- block_1 = ggml_cont (ctx0, ggml_permute (ctx0, block_1, 2 , 0 , 1 , 3 ) );
1383+ block_1 = ggml_permute (ctx0, block_1, 2 , 0 , 1 , 3 );
13841384 // block_1 shape = [1, 2048, 12, 12], ne = [12, 12, 2048, 1]
13851385 // hardswish
13861386 ggml_tensor * block_1_hw = ggml_hardswish (ctx0, block_1);
@@ -1427,9 +1427,9 @@ struct clip_graph {
14271427 mlp_2 = ggml_add (ctx0, mlp_2, model.mm_model_mlp_2_b );
14281428 // mlp_2 ne = [2048, 576, 1, 1]
14291429 // // AVG Pool Layer 2*2, strides = 2
1430- mlp_2 = ggml_cont (ctx0, ggml_permute (ctx0, mlp_2, 1 , 0 , 2 , 3 ) );
1430+ mlp_2 = ggml_permute (ctx0, mlp_2, 1 , 0 , 2 , 3 );
14311431 // mlp_2 ne = [576, 2048, 1, 1]
1432- mlp_2 = ggml_reshape_4d (ctx0, mlp_2, n_patch, n_patch, mlp_2->ne [1 ], mlp_2->ne [2 ]);
1432+ mlp_2 = ggml_cont_4d (ctx0, mlp_2, n_patch, n_patch, mlp_2->ne [1 ], mlp_2->ne [2 ]);
14331433 // mlp_2 ne [24, 24, 2048, 1]
14341434 mlp_2 = ggml_pool_2d (ctx0, mlp_2, GGML_OP_POOL_AVG, 2 , 2 , 2 , 2 , 0 , 0 );
14351435 // weight ne = [3, 3, 2048, 1]
@@ -1449,8 +1449,8 @@ struct clip_graph {
14491449 // glm projector
14501450 else if (ctx->proj_type () == PROJECTOR_TYPE_GLM_EDGE) {
14511451 size_t gridsz = (size_t )sqrt (embeddings->ne [1 ]);
1452- embeddings = ggml_cont (ctx0, ggml_permute (ctx0,embeddings,1 ,0 ,2 ,3 ) );
1453- embeddings = ggml_reshape_3d (ctx0, embeddings, gridsz, gridsz, embeddings->ne [1 ]);
1452+ embeddings = ggml_permute (ctx0,embeddings,1 ,0 ,2 ,3 );
1453+ embeddings = ggml_cont_3d (ctx0, embeddings, gridsz, gridsz, embeddings->ne [1 ]);
14541454 embeddings = ggml_conv_2d (ctx0, model.mm_model_adapter_conv_w , embeddings, 2 , 2 , 0 , 0 , 1 , 1 );
14551455 embeddings = ggml_reshape_3d (ctx0, embeddings,embeddings->ne [0 ]*embeddings->ne [1 ] , embeddings->ne [2 ], batch_size);
14561456 embeddings = ggml_cont (ctx0, ggml_permute (ctx0,embeddings, 1 , 0 , 2 , 3 ));
@@ -1750,7 +1750,9 @@ struct clip_graph {
17501750 cur = ggml_cont (ctx0, cur);
17511751 cur = ggml_pool_1d (ctx0, cur, GGML_OP_POOL_AVG, 2 , 2 , 0 );
17521752 cur = ggml_transpose (ctx0, cur);
1753- cur = ggml_cont (ctx0, cur);
1753+ if (!model.post_ln_w ) {
1754+ cur = ggml_cont (ctx0, cur);
1755+ }
17541756 inpL = cur;
17551757 }
17561758
@@ -2005,7 +2007,6 @@ struct clip_graph {
20052007 ggml_row_size (cur->type , n_dim),
20062008 ggml_row_size (cur->type , n_dim*n_head),
20072009 n_dim/2 * ggml_element_size (cur));
2008- second = ggml_cont (ctx0, second); // copy, because ggml_rope don't play well with non-contiguous tensors
20092010 second = ggml_rope_ext (
20102011 ctx0,
20112012 second,
0 commit comments