Skip to content

Commit 061a448

Browse files
committed
Implement pixel unshuffle block for lfm2vl
1 parent 955b18e commit 061a448

File tree

1 file changed

+39
-0
lines changed

1 file changed

+39
-0
lines changed

tools/mtmd/clip.cpp

Lines changed: 39 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1986,6 +1986,45 @@ struct clip_graph {
19861986
return cur;
19871987
}
19881988

1989+
// lfm2vl
1990+
static ggml_tensor * build_pixel_unshuffle_block(ggml_context * ctx, ggml_tensor * x, const int factor) {
1991+
// [n, w, h, c]
1992+
const int64_t n = x->ne[3];
1993+
int64_t w = x->ne[2];
1994+
int64_t h = x->ne[1];
1995+
const int64_t c = x->ne[0];
1996+
1997+
if (factor == 1) {
1998+
return x;
1999+
}
2000+
2001+
GGML_ASSERT(factor > 1 && (factor & (factor - 1)) == 0); // factor must be power of two for GGML_PAD
2002+
GGML_ASSERT(n == 1); // only support batch size of 1
2003+
GGML_ASSERT(w > 0 && h > 0); // width and height must be positive
2004+
2005+
// pad w and h to factor
2006+
const int64_t pad_w = GGML_PAD(w, factor) - w;
2007+
const int64_t pad_h = GGML_PAD(h, factor) - h;
2008+
2009+
if (pad_w || pad_h) {
2010+
x = ggml_pad(ctx, x, 0, pad_h, pad_w, 0);
2011+
w += pad_w;
2012+
h += pad_h;
2013+
}
2014+
2015+
// unshuffle h
2016+
x = ggml_view_3d(ctx, x, c * factor, h / factor, w, x->nb[1] * factor, x->nb[2], 0);
2017+
x = ggml_permute(ctx, x, 0, 2, 1, 3);
2018+
x = ggml_cont(ctx, x);
2019+
2020+
// unshuffle w
2021+
x = ggml_view_3d(ctx, x, c * factor * factor, w / factor, h / factor, x->nb[1] * factor, x->nb[2], 0);
2022+
x = ggml_permute(ctx, x, 0, 2, 1, 3);
2023+
x = ggml_cont(ctx, x);
2024+
2025+
return x;
2026+
}
2027+
19892028
};
19902029

19912030
static ggml_cgraph * clip_image_build_graph(clip_ctx * ctx, const clip_image_f32_batch & imgs) {

0 commit comments

Comments
 (0)