@@ -572,13 +572,13 @@ struct clip_graph {
572572 } else if (ctx->proj_type () == PROJECTOR_TYPE_IDEFICS3) {
573573 // pixel_shuffle
574574 // https://github.com/huggingface/transformers/blob/0a950e0bbe1ed58d5401a6b547af19f15f0c195e/src/transformers/models/idefics3/modeling_idefics3.py#L578
575- const int scale_factor = model.hparams .get_scale_factor_per_side () ;
575+ const int scale_factor = model.hparams .proj_scale_factor ;
576576 cur = build_patch_merge_permute (cur, scale_factor);
577577 cur = ggml_mul_mat (ctx0, model.projection , cur);
578578
579579 } else if (ctx->proj_type () == PROJECTOR_TYPE_LFM2) {
580580 // pixel unshuffle block
581- const int scale_factor = model.hparams .get_scale_factor_per_side () ;
581+ const int scale_factor = model.hparams .proj_scale_factor ;
582582 cur = build_patch_merge_permute (cur, scale_factor);
583583
584584 // projection
@@ -3570,10 +3570,17 @@ struct img_tool {
35703570 static void composite (clip_image_u8 & dst, const clip_image_u8 & src, int offset_x, int offset_y) {
35713571 for (int y = 0 ; y < src.ny ; ++y) {
35723572 for (int x = 0 ; x < src.nx ; ++x) {
3573- for (int c = 0 ; c < 3 ; ++c) {
3574- dst.buf [3 * ((y + offset_y) * dst.nx + (x + offset_x)) + c] =
3575- src.buf [3 * (y * src.nx + x) + c];
3573+ int dx = x + offset_x;
3574+ int dy = y + offset_y;
3575+ // skip pixels that would be out of bounds in the destination
3576+ if (dx < 0 || dy < 0 || dx >= dst.nx || dy >= dst.ny ) {
3577+ continue ;
35763578 }
3579+ size_t dst_idx = 3 * (static_cast <size_t >(dy) * dst.nx + static_cast <size_t >(dx));
3580+ size_t src_idx = 3 * (static_cast <size_t >(y) * src.nx + static_cast <size_t >(x));
3581+ dst.buf [dst_idx + 0 ] = src.buf [src_idx + 0 ];
3582+ dst.buf [dst_idx + 1 ] = src.buf [src_idx + 1 ];
3583+ dst.buf [dst_idx + 2 ] = src.buf [src_idx + 2 ];
35773584 }
35783585 }
35793586 }
0 commit comments