Skip to content

Commit 583a3a3

Browse files
authored
remove unnecessary conts and merge reshapes
1 parent 21c17b5 commit 583a3a3

File tree

2 files changed

+37
-38
lines changed

2 files changed

+37
-38
lines changed

src/llama-model.cpp

Lines changed: 7 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -10864,8 +10864,8 @@ struct llm_build_gemma3n_iswa : public llm_graph_context {
1086410864
ggml_tensor * all_coefs = build_lora_mm(model.layers[il].altup_correct_coef, modalities); // [n_altup, n_tokens]
1086510865
all_coefs = ggml_scale_bias(ctx0, all_coefs, 1.0f, 1.0f); // + 1.0
1086610866
cb(all_coefs, "all_coefs", il);
10867-
all_coefs = ggml_cont(ctx0, ggml_transpose(ctx0, all_coefs)); // [n_tokens, n_altup]
10868-
all_coefs = ggml_reshape_3d(ctx0, all_coefs, 1, n_tokens, n_altup); // [1, n_tokens, n_altup]
10867+
all_coefs = ggml_transpose(ctx0, all_coefs); // [n_tokens, n_altup]
10868+
all_coefs = ggml_cont_3d(ctx0, all_coefs, 1, n_tokens, n_altup); // [1, n_tokens, n_altup]
1086910869

1087010870
innovation = ggml_repeat_4d(ctx0, innovation, n_embd, n_tokens, n_altup, 1);
1087110871
ggml_tensor * corrected = ggml_mul(ctx0, innovation, all_coefs); // [n_embd, n_tokens, n_altup]
@@ -15769,7 +15769,7 @@ struct llm_build_wavtokenizer_dec : public llm_graph_context {
1576915769
};
1577015770
}
1577115771

15772-
cur = ggml_cont(ctx0, ggml_transpose(ctx0, cur));
15772+
cur = ggml_transpose(ctx0, cur);
1577315773

1577415774
cur = build_norm(cur,
1577515775
model.tok_norm,
@@ -15789,7 +15789,7 @@ struct llm_build_wavtokenizer_dec : public llm_graph_context {
1578915789
cur = ggml_conv_1d_dw_ph(ctx0, layer.dw, cur, 1, 1);
1579015790
cur = ggml_add(ctx0, cur, layer.dw_b);
1579115791

15792-
cur = ggml_cont(ctx0, ggml_transpose(ctx0, cur));
15792+
cur = ggml_transpose(ctx0, cur);
1579315793

1579415794
cur = build_norm(cur,
1579515795
layer.norm,
@@ -15812,7 +15812,7 @@ struct llm_build_wavtokenizer_dec : public llm_graph_context {
1581215812

1581315813
cur = inpL;
1581415814

15815-
cur = ggml_cont(ctx0, ggml_transpose(ctx0, cur));
15815+
cur = ggml_transpose(ctx0, cur);
1581615816

1581715817
cur = build_norm(cur,
1581815818
model.output_norm,
@@ -16913,15 +16913,13 @@ struct llm_build_plamo2 : public llm_graph_context_mamba {
1691316913
cb(zx, "mamba_in_proj", il);
1691416914
// {8192, 5, 1, 1} -> {8192, 1, 5, 1}
1691516915
zx = ggml_permute(ctx0, zx, 0, 2, 1, 3);
16916-
zx = ggml_cont(ctx0, zx);
16917-
zx = ggml_reshape_4d(ctx0, zx, head_dim * 2, n_heads, n_seq_tokens, n_seqs);
16916+
zx = ggml_cont_4d(ctx0, zx, head_dim * 2, n_heads, n_seq_tokens, n_seqs);
1691816917
cb(zx, "mamba_in_proj_out", il);
1691916918

1692016919
// split into z and x
1692116920
// => {head_dim * n_heads, n_seq_tokens, n_seqs}
1692216921
ggml_tensor * x = ggml_view_4d(ctx0, zx, head_dim, n_heads, n_seq_tokens, n_seqs, zx->nb[1], zx->nb[2], zx->nb[3], head_dim*ggml_element_size(zx));
16923-
x = ggml_cont(ctx0, x);
16924-
x = ggml_reshape_3d(ctx0, x, head_dim * n_heads, n_seq_tokens, n_seqs);
16922+
x = ggml_cont_3d(ctx0, x, head_dim * n_heads, n_seq_tokens, n_seqs);
1692516923
// x = ggml_permute(ctx0, x, 0, 2, 1, 3);
1692616924
cb(x, "mamba_x_split", il);
1692716925

tools/mtmd/clip.cpp

Lines changed: 30 additions & 29 deletions
Original file line numberDiff line numberDiff line change
@@ -508,13 +508,13 @@ struct clip_graph {
508508
const int patches_per_image = n_patches_x;
509509
const int kernel_size = hparams.proj_scale_factor;
510510

511-
cur = ggml_cont(ctx0, ggml_transpose(ctx0, cur));
512-
cur = ggml_reshape_4d(ctx0, cur, patches_per_image, patches_per_image, n_embd, batch_size);
511+
cur = ggml_transpose(ctx0, cur);
512+
cur = ggml_cont_4d(ctx0, cur, patches_per_image, patches_per_image, n_embd, batch_size);
513513

514514
// doing a pool2d to reduce the number of output tokens
515515
cur = ggml_pool_2d(ctx0, cur, GGML_OP_POOL_AVG, kernel_size, kernel_size, kernel_size, kernel_size, 0, 0);
516516
cur = ggml_reshape_3d(ctx0, cur, cur->ne[0] * cur->ne[0], n_embd, batch_size);
517-
cur = ggml_cont(ctx0, ggml_transpose(ctx0, cur));
517+
cur = ggml_transpose(ctx0, cur);
518518

519519
// apply norm before projection
520520
cur = ggml_rms_norm(ctx0, cur, eps);
@@ -537,13 +537,13 @@ struct clip_graph {
537537
GGML_ASSERT(scale_factor != 0);
538538
cur = ggml_reshape_4d(ctx0, cur, n_embd * scale_factor, width / scale_factor, height, bsz);
539539
cur = ggml_permute(ctx0, cur, 0, 2, 1, 3);
540-
cur = ggml_reshape_4d(ctx0, ggml_cont(ctx0, cur),
540+
cur = ggml_cont_4d(ctx0, cur,
541541
n_embd * scale_factor * scale_factor,
542542
height / scale_factor,
543543
width / scale_factor,
544544
bsz);
545545
cur = ggml_permute(ctx0, cur, 0, 2, 1, 3);
546-
cur = ggml_reshape_3d(ctx0, ggml_cont(ctx0, cur),
546+
cur = ggml_cont_3d(ctx0, cur,
547547
n_embd * scale_factor * scale_factor,
548548
seq / (scale_factor * scale_factor),
549549
bsz);
@@ -570,13 +570,13 @@ struct clip_graph {
570570

571571
// unshuffle h
572572
cur = ggml_reshape_3d(ctx0, cur, n_embd * scale_factor, width / scale_factor, height);
573-
cur = ggml_cont(ctx0, ggml_permute(ctx0, cur, 0, 2, 1, 3));
573+
cur = ggml_permute(ctx0, cur, 0, 2, 1, 3);
574574

575575
// unshuffle w
576-
cur = ggml_reshape_3d(ctx0, cur, n_embd * scale_factor * scale_factor, height / scale_factor, width / scale_factor);
577-
cur = ggml_cont(ctx0, ggml_permute(ctx0, cur, 0, 2, 1, 3));
576+
cur = ggml_cont_3d(ctx0, cur, n_embd * scale_factor * scale_factor, height / scale_factor, width / scale_factor);
577+
cur = ggml_permute(ctx0, cur, 0, 2, 1, 3);
578578

579-
cur = ggml_reshape_2d(ctx0, cur, cur->ne[0], cur->ne[1] * cur->ne[2]);
579+
cur = ggml_cont_2d(ctx0, cur, cur->ne[0], cur->ne[1] * cur->ne[2]);
580580

581581
// projection
582582
cur = ggml_norm(ctx0, cur, 1e-5); // default nn.LayerNorm
@@ -715,15 +715,15 @@ struct clip_graph {
715715
auto inp_1 = ggml_conv_2d(ctx0, model.patch_embeddings_1, inp_raw, patch_size, patch_size, 0, 0, 1, 1);
716716
inp = ggml_add(ctx0, inp, inp_1);
717717

718-
inp = ggml_cont(ctx0, ggml_permute(ctx0, inp, 1, 2, 0, 3)); // [w, h, c, b] -> [c, w, h, b]
719-
inp = ggml_reshape_4d(
718+
inp = ggml_permute(ctx0, inp, 1, 2, 0, 3); // [w, h, c, b] -> [c, w, h, b]
719+
inp = ggml_cont_4d(
720720
ctx0, inp,
721721
n_embd * 2, n_patches_x / 2, n_patches_y, batch_size);
722722
inp = ggml_reshape_4d(
723723
ctx0, inp,
724724
n_embd * 2, n_patches_x / 2, 2, batch_size * (n_patches_y / 2));
725-
inp = ggml_cont(ctx0, ggml_permute(ctx0, inp, 0, 2, 1, 3));
726-
inp = ggml_reshape_3d(
725+
inp = ggml_permute(ctx0, inp, 0, 2, 1, 3);
726+
inp = ggml_cont_3d(
727727
ctx0, inp,
728728
n_embd, n_patches_x * n_patches_y, batch_size);
729729
}
@@ -988,14 +988,14 @@ struct clip_graph {
988988
GGML_ASSERT(scale_factor > 0);
989989
cur = ggml_reshape_4d(ctx0, cur, n_embd * scale_factor, height / scale_factor, width, bsz);
990990
cur = ggml_permute(ctx0, cur, 0, 2, 1, 3);
991-
cur = ggml_reshape_4d(ctx0, ggml_cont(ctx0, cur),
991+
cur = ggml_cont_4d(ctx0, cur,
992992
n_embd * scale_factor * scale_factor,
993993
height / scale_factor,
994994
width / scale_factor,
995995
bsz);
996996
cur = ggml_permute(ctx0, cur, 0, 2, 1, 3);
997997
// flatten to 2D
998-
cur = ggml_reshape_2d(ctx0, ggml_cont(ctx0, cur),
998+
cur = ggml_cont_2d(ctx0, cur,
999999
n_embd * scale_factor * scale_factor,
10001000
cur->ne[1] * cur->ne[2]);
10011001
}
@@ -1081,14 +1081,14 @@ struct clip_graph {
10811081
n_patches_y,
10821082
bsz);
10831083
cur = ggml_permute(ctx0, cur, 0, 2, 1, 3);
1084-
cur = ggml_reshape_4d(ctx0, ggml_cont(ctx0, cur),
1084+
cur = ggml_cont_4d(ctx0, cur,
10851085
n_embd * scale_factor * scale_factor,
10861086
n_patches_x / scale_factor,
10871087
n_patches_y / scale_factor,
10881088
bsz);
10891089
cur = ggml_permute(ctx0, cur, 0, 2, 1, 3);
10901090
// flatten to 2D
1091-
cur = ggml_reshape_2d(ctx0, ggml_cont(ctx0, cur),
1091+
cur = ggml_cont_2d(ctx0, cur,
10921092
n_embd * scale_factor * scale_factor,
10931093
n_patches / scale_factor / scale_factor);
10941094
cb(cur, "pixel_shuffle", -1);
@@ -1321,18 +1321,18 @@ struct clip_graph {
13211321
ggml_tensor * block_1 = nullptr;
13221322
{
13231323
// transpose from [1, 576, 2048] --> [1, 2048, 576] --> [1, 2048, 24, 24]
1324-
mlp_3 = ggml_cont(ctx0, ggml_permute(ctx0, mlp_3, 1, 0, 2, 3));
1325-
mlp_3 = ggml_reshape_4d(ctx0, mlp_3, n_patch, n_patch, mlp_3->ne[1], mlp_3->ne[2]);
1324+
mlp_3 = ggml_permute(ctx0, mlp_3, 1, 0, 2, 3);
1325+
mlp_3 = ggml_cont_4d(ctx0, mlp_3, n_patch, n_patch, mlp_3->ne[1], mlp_3->ne[2]);
13261326
// stride = 1, padding = 1, bias is nullptr
13271327
block_1 = ggml_conv_2d_dw(ctx0, model.mm_model_block_1_block_0_0_w, mlp_3, 1, 1, 1, 1, 1, 1);
13281328

13291329
// layer norm
13301330
// // block_1 shape = [1, 2048, 24, 24], ne = [24, 24, 2048, 1]
1331-
block_1 = ggml_cont(ctx0, ggml_permute(ctx0, block_1, 1, 2, 0, 3));
1331+
block_1 = ggml_permute(ctx0, block_1, 1, 2, 0, 3);
13321332
// block_1 shape = [1, 24, 24, 2048], ne = [2048, 24, 24, 1]
13331333
block_1 = ggml_norm(ctx0, block_1, eps);
13341334
block_1 = ggml_add(ctx0, ggml_mul(ctx0, block_1, model.mm_model_block_1_block_0_1_w), model.mm_model_block_1_block_0_1_b);
1335-
block_1 = ggml_cont(ctx0, ggml_permute(ctx0, block_1, 2, 0, 1, 3));
1335+
block_1 = ggml_permute(ctx0, block_1, 2, 0, 1, 3);
13361336

13371337
// block_1 shape = [1, 2048, 24, 24], ne = [24, 24, 2048, 1]
13381338
// hardswish
@@ -1376,11 +1376,11 @@ struct clip_graph {
13761376

13771377
// block_1 shape = [1, 2048, 12, 12], ne = [12, 12, 2048, 1]
13781378
// layer norm
1379-
block_1 = ggml_cont(ctx0, ggml_permute(ctx0, block_1, 1, 2, 0, 3));
1379+
block_1 = ggml_permute(ctx0, block_1, 1, 2, 0, 3);
13801380
// block_1 shape = [1, 12, 12, 2048], ne = [2048, 12, 12, 1]
13811381
block_1 = ggml_norm(ctx0, block_1, eps);
13821382
block_1 = ggml_add(ctx0, ggml_mul(ctx0, block_1, model.mm_model_block_2_block_0_1_w), model.mm_model_block_2_block_0_1_b);
1383-
block_1 = ggml_cont(ctx0, ggml_permute(ctx0, block_1, 2, 0, 1, 3));
1383+
block_1 = ggml_permute(ctx0, block_1, 2, 0, 1, 3);
13841384
// block_1 shape = [1, 2048, 12, 12], ne = [12, 12, 2048, 1]
13851385
// hardswish
13861386
ggml_tensor * block_1_hw = ggml_hardswish(ctx0, block_1);
@@ -1427,9 +1427,9 @@ struct clip_graph {
14271427
mlp_2 = ggml_add(ctx0, mlp_2, model.mm_model_mlp_2_b);
14281428
// mlp_2 ne = [2048, 576, 1, 1]
14291429
// // AVG Pool Layer 2*2, strides = 2
1430-
mlp_2 = ggml_cont(ctx0, ggml_permute(ctx0, mlp_2, 1, 0, 2, 3));
1430+
mlp_2 = ggml_permute(ctx0, mlp_2, 1, 0, 2, 3);
14311431
// mlp_2 ne = [576, 2048, 1, 1]
1432-
mlp_2 = ggml_reshape_4d(ctx0, mlp_2, n_patch, n_patch, mlp_2->ne[1], mlp_2->ne[2]);
1432+
mlp_2 = ggml_cont_4d(ctx0, mlp_2, n_patch, n_patch, mlp_2->ne[1], mlp_2->ne[2]);
14331433
// mlp_2 ne [24, 24, 2048, 1]
14341434
mlp_2 = ggml_pool_2d(ctx0, mlp_2, GGML_OP_POOL_AVG, 2, 2, 2, 2, 0, 0);
14351435
// weight ne = [3, 3, 2048, 1]
@@ -1449,8 +1449,8 @@ struct clip_graph {
14491449
// glm projector
14501450
else if (ctx->proj_type() == PROJECTOR_TYPE_GLM_EDGE) {
14511451
size_t gridsz = (size_t)sqrt(embeddings->ne[1]);
1452-
embeddings = ggml_cont(ctx0, ggml_permute(ctx0,embeddings,1,0,2,3));
1453-
embeddings = ggml_reshape_3d(ctx0, embeddings, gridsz, gridsz, embeddings->ne[1]);
1452+
embeddings = ggml_permute(ctx0,embeddings,1,0,2,3);
1453+
embeddings = ggml_cont_3d(ctx0, embeddings, gridsz, gridsz, embeddings->ne[1]);
14541454
embeddings = ggml_conv_2d(ctx0, model.mm_model_adapter_conv_w, embeddings, 2, 2, 0, 0, 1, 1);
14551455
embeddings = ggml_reshape_3d(ctx0, embeddings,embeddings->ne[0]*embeddings->ne[1] , embeddings->ne[2], batch_size);
14561456
embeddings = ggml_cont(ctx0, ggml_permute(ctx0,embeddings, 1, 0, 2, 3));
@@ -1750,7 +1750,9 @@ struct clip_graph {
17501750
cur = ggml_cont(ctx0, cur);
17511751
cur = ggml_pool_1d(ctx0, cur, GGML_OP_POOL_AVG, 2, 2, 0);
17521752
cur = ggml_transpose(ctx0, cur);
1753-
cur = ggml_cont(ctx0, cur);
1753+
if (!model.post_ln_w) {
1754+
cur = ggml_cont(ctx0, cur);
1755+
}
17541756
inpL = cur;
17551757
}
17561758

@@ -2005,7 +2007,6 @@ struct clip_graph {
20052007
ggml_row_size(cur->type, n_dim),
20062008
ggml_row_size(cur->type, n_dim*n_head),
20072009
n_dim/2 * ggml_element_size(cur));
2008-
second = ggml_cont(ctx0, second); // copy, because ggml_rope don't play well with non-contiguous tensors
20092010
second = ggml_rope_ext(
20102011
ctx0,
20112012
second,

0 commit comments

Comments
 (0)