You signed in with another tab or window. Reload to refresh your session.You signed out in another tab or window. Reload to refresh your session.You switched accounts on another tab or window. Reload to refresh your session.Dismiss alert
// implementation of the 2D RoPE without adding a new op in ggml
1978
-
// this is not efficient (use double the memory), but works on all backends
1979
-
// TODO: there was a more efficient which relies on ggml_view and ggml_rope_ext_inplace, but the rope inplace does not work well with non-contiguous tensors ; we should fix that and revert back to the original implementation in https://github.com/ggml-org/llama.cpp/pull/13065
1980
1978
static ggml_tensor * build_rope_2d(
1979
+
ggml_cgraph * gf,
1981
1980
ggml_context * ctx0,
1982
1981
ggml_tensor * cur,
1983
1982
ggml_tensor * pos_a, // first half
@@ -2002,16 +2001,10 @@ struct clip_graph {
2002
2001
: 1.0;
2003
2002
2004
2003
// first half
2005
-
ggml_tensor * first;
2006
2004
{
2007
-
first = ggml_view_3d(ctx0, cur,
2008
-
n_dim/2, n_head, n_pos,
2009
-
ggml_row_size(cur->type, n_dim),
2010
-
ggml_row_size(cur->type, n_dim*n_head),
2011
-
0);
2012
-
first = ggml_rope_ext(
2005
+
cur = ggml_rope_ext(
2013
2006
ctx0,
2014
-
first,
2007
+
cur,
2015
2008
pos_a, // positions
2016
2009
nullptr, // freq factors
2017
2010
n_dim/2, // n_dims
@@ -2028,7 +2021,8 @@ struct clip_graph {
2028
2021
ggml_row_size(cur->type, n_dim),
2029
2022
ggml_row_size(cur->type, n_dim*n_head),
2030
2023
n_dim/2 * ggml_element_size(cur));
2031
-
second = ggml_rope_ext(
2024
+
// "second" tensor should be on the same backend as ggml_rope_ext(), therefore we can use inplace version
0 commit comments