Skip to content

Commit 530ef06

Browse files
committed
mtmd: more optimized build_rope_2d
1 parent 7f76692 commit 530ef06

File tree

1 file changed

+9
-15
lines changed

1 file changed

+9
-15
lines changed

tools/mtmd/clip.cpp

Lines changed: 9 additions & 15 deletions
Original file line numberDiff line numberDiff line change
@@ -571,7 +571,7 @@ struct clip_graph {
571571
ggml_set_input(pos_w);
572572

573573
auto add_pos = [&](ggml_tensor * cur, const clip_layer &) {
574-
return build_rope_2d(ctx0, cur, pos_h, pos_w, hparams.rope_theta, true);
574+
return build_rope_2d(gf, ctx0, cur, pos_h, pos_w, hparams.rope_theta, true);
575575
};
576576

577577
ggml_tensor * inp = build_inp();
@@ -1013,7 +1013,7 @@ struct clip_graph {
10131013
// first half is X axis and second half is Y axis
10141014
// ref: https://github.com/huggingface/transformers/blob/40a493c7ed4f19f08eadb0639cf26d49bfa5e180/src/transformers/models/llama4/modeling_llama4.py#L1312
10151015
// ref: https://github.com/Blaizzy/mlx-vlm/blob/a57156aa87b33cca6e5ee6cfc14dd4ef8f611be6/mlx_vlm/models/llama4/vision.py#L441
1016-
return build_rope_2d(ctx0, cur, pos_w, pos_h, hparams.rope_theta, false);
1016+
return build_rope_2d(gf, ctx0, cur, pos_w, pos_h, hparams.rope_theta, false);
10171017
};
10181018
ggml_tensor * cur = build_vit(
10191019
inp, n_pos,
@@ -1088,7 +1088,7 @@ struct clip_graph {
10881088
// build ViT with 2D position embeddings
10891089
auto add_pos = [&](ggml_tensor * cur, const clip_layer &) {
10901090
// first half is X axis and second half is Y axis
1091-
return build_rope_2d(ctx0, cur, pos_w, pos_h, hparams.rope_theta, false);
1091+
return build_rope_2d(gf, ctx0, cur, pos_w, pos_h, hparams.rope_theta, false);
10921092
};
10931093

10941094
ggml_tensor * inp = build_inp();
@@ -1975,9 +1975,8 @@ struct clip_graph {
19751975
}
19761976

19771977
// implementation of the 2D RoPE without adding a new op in ggml
1978-
// this is not efficient (use double the memory), but works on all backends
1979-
// TODO: there was a more efficient which relies on ggml_view and ggml_rope_ext_inplace, but the rope inplace does not work well with non-contiguous tensors ; we should fix that and revert back to the original implementation in https://github.com/ggml-org/llama.cpp/pull/13065
19801978
static ggml_tensor * build_rope_2d(
1979+
ggml_cgraph * gf,
19811980
ggml_context * ctx0,
19821981
ggml_tensor * cur,
19831982
ggml_tensor * pos_a, // first half
@@ -2002,16 +2001,10 @@ struct clip_graph {
20022001
: 1.0;
20032002

20042003
// first half
2005-
ggml_tensor * first;
20062004
{
2007-
first = ggml_view_3d(ctx0, cur,
2008-
n_dim/2, n_head, n_pos,
2009-
ggml_row_size(cur->type, n_dim),
2010-
ggml_row_size(cur->type, n_dim*n_head),
2011-
0);
2012-
first = ggml_rope_ext(
2005+
cur = ggml_rope_ext(
20132006
ctx0,
2014-
first,
2007+
cur,
20152008
pos_a, // positions
20162009
nullptr, // freq factors
20172010
n_dim/2, // n_dims
@@ -2028,7 +2021,8 @@ struct clip_graph {
20282021
ggml_row_size(cur->type, n_dim),
20292022
ggml_row_size(cur->type, n_dim*n_head),
20302023
n_dim/2 * ggml_element_size(cur));
2031-
second = ggml_rope_ext(
2024+
// "second" tensor should be on the same backend as ggml_rope_ext(), therefore we can use inplace version
2025+
second = ggml_rope_ext_inplace(
20322026
ctx0,
20332027
second,
20342028
pos_b, // positions
@@ -2038,9 +2032,9 @@ struct clip_graph {
20382032
freq_scale_odd,
20392033
0.0f, 1.0f, 0.0f, 0.0f
20402034
);
2035+
ggml_build_forward_expand(gf, second);
20412036
}
20422037

2043-
cur = ggml_concat(ctx0, first, second, 0);
20442038
return cur;
20452039
}
20462040

0 commit comments

Comments
 (0)