Skip to content

Commit 4274417

Browse files
committed
fix (2)
1 parent 2892e0f commit 4274417

File tree

1 file changed

+11
-15
lines changed

1 file changed

+11
-15
lines changed

tools/mtmd/clip.cpp

Lines changed: 11 additions & 15 deletions
Original file line numberDiff line numberDiff line change
@@ -3546,12 +3546,12 @@ struct img_tool {
35463546
const int width = inp_size.width;
35473547
const int height = inp_size.height;
35483548

3549-
auto round_by_factor = [f = align_size](float x) { return static_cast<int>(std::nearbyintf(x / static_cast<float>(f))) * f; };
35503549
auto ceil_by_factor = [f = align_size](float x) { return static_cast<int>(std::ceil(x / static_cast<float>(f))) * f; };
35513550
auto floor_by_factor = [f = align_size](float x) { return static_cast<int>(std::floor(x / static_cast<float>(f))) * f; };
35523551

3553-
int h_bar = std::max(align_size, round_by_factor(height));
3554-
int w_bar = std::max(align_size, round_by_factor(width));
3552+
// always align up first
3553+
int h_bar = std::max(align_size, ceil_by_factor(height));
3554+
int w_bar = std::max(align_size, ceil_by_factor(width));
35553555

35563556
if (h_bar * w_bar > max_pixels) {
35573557
const auto beta = std::sqrt(static_cast<float>(height * width) / max_pixels);
@@ -4030,7 +4030,7 @@ bool clip_image_preprocess(struct clip_ctx * ctx, const clip_image_u8 * img, str
40304030
case PROJECTOR_TYPE_QWEN25VL:
40314031
case PROJECTOR_TYPE_QWEN3VL:
40324032
{
4033-
// step 1: make a blank canvas which aligns with grid
4033+
// step 1: make a blank canvas which aligns to the grid
40344034
clip_image_u8 canvas;
40354035
const clip_image_size canvas_size = img_tool::calc_size_preserved_ratio(
40364036
original_size,
@@ -4042,22 +4042,18 @@ bool clip_image_preprocess(struct clip_ctx * ctx, const clip_image_u8 * img, str
40424042
canvas.buf.resize(3 * canvas.nx * canvas.ny);
40434043
img_tool::fill(canvas, {0, 0, 0});
40444044

4045-
// step 2: resize original image to fit into the canvas
4046-
const clip_image_size scaled_size = img_tool::calc_size_preserved_ratio(
4047-
original_size,
4048-
1, // avoid distorting which causes bbox misalignment
4049-
params.image_min_pixels,
4050-
params.image_max_pixels);
4051-
4052-
if (scaled_size.height != original_size.height ||
4053-
scaled_size.width != original_size.width) {
4045+
// step 2: composite resized image onto the canvas, top-left corner
4046+
if (original_size.height > canvas.ny || original_size.width > canvas.nx) {
4047+
// need to resize original image first
40544048
clip_image_u8 resized;
4049+
const clip_image_size scaled_size = img_tool::calc_size_preserved_ratio(
4050+
original_size,
4051+
1, // no need to align here since we will composite onto canvas
4052+
std::min(canvas.nx, canvas.ny)); // fit into the canvas
40554053
img_tool::resize(*img, resized, scaled_size, img_tool::RESIZE_ALGO_BILINEAR);
4056-
// step 3: composite resized image onto the canvas, top-left corner
40574054
img_tool::composite(canvas, resized, 0, 0);
40584055
} else {
40594056
// no resizing needed
4060-
// step 3: composite original image onto the canvas, top-left corner
40614057
img_tool::composite(canvas, *img, 0, 0);
40624058
}
40634059

0 commit comments

Comments
 (0)