@@ -1986,6 +1986,45 @@ struct clip_graph {
19861986 return cur;
19871987 }
19881988
1989+ // lfm2vl
1990+ static ggml_tensor * build_pixel_unshuffle_block (ggml_context * ctx, ggml_tensor * x, const int factor) {
1991+ // [n, w, h, c]
1992+ const int64_t n = x->ne [3 ];
1993+ int64_t w = x->ne [2 ];
1994+ int64_t h = x->ne [1 ];
1995+ const int64_t c = x->ne [0 ];
1996+
1997+ if (factor == 1 ) {
1998+ return x;
1999+ }
2000+
2001+ GGML_ASSERT (factor > 1 && (factor & (factor - 1 )) == 0 ); // factor must be power of two for GGML_PAD
2002+ GGML_ASSERT (n == 1 ); // only support batch size of 1
2003+ GGML_ASSERT (w > 0 && h > 0 ); // width and height must be positive
2004+
2005+ // pad w and h to factor
2006+ const int64_t pad_w = GGML_PAD (w, factor) - w;
2007+ const int64_t pad_h = GGML_PAD (h, factor) - h;
2008+
2009+ if (pad_w || pad_h) {
2010+ x = ggml_pad (ctx, x, 0 , pad_h, pad_w, 0 );
2011+ w += pad_w;
2012+ h += pad_h;
2013+ }
2014+
2015+ // unshuffle h
2016+ x = ggml_view_3d (ctx, x, c * factor, h / factor, w, x->nb [1 ] * factor, x->nb [2 ], 0 );
2017+ x = ggml_permute (ctx, x, 0 , 2 , 1 , 3 );
2018+ x = ggml_cont (ctx, x);
2019+
2020+ // unshuffle w
2021+ x = ggml_view_3d (ctx, x, c * factor * factor, w / factor, h / factor, x->nb [1 ] * factor, x->nb [2 ], 0 );
2022+ x = ggml_permute (ctx, x, 0 , 2 , 1 , 3 );
2023+ x = ggml_cont (ctx, x);
2024+
2025+ return x;
2026+ }
2027+
19892028};
19902029
19912030static ggml_cgraph * clip_image_build_graph (clip_ctx * ctx, const clip_image_f32_batch & imgs) {
0 commit comments