@@ -2230,7 +2230,14 @@ struct llava_uhd {
22302230 clip_image_size refined_size; // size of image right before slicing (must be multiple of slice size)
22312231 clip_image_size grid_size; // grid_size.width * grid_size.height = number of slices
22322232 std::vector<slice_coordinates> slices;
2233+
2234+ img_tool::resize_algo interpolation_overview = img_tool::RESIZE_ALGO_BILINEAR;
2235+ bool padding_overview = false ; // if true, refine image will be padded to the grid size (e.g. llava-1.6)
2236+ std::array<uint8_t , 3 > pad_color_overview = {0 , 0 , 0 };
2237+
2238+ img_tool::resize_algo interpolation_refined = img_tool::RESIZE_ALGO_BICUBIC;
22332239 bool padding_refined = false ; // if true, refine image will be padded to the grid size (e.g. llava-1.6)
2240+ std::array<uint8_t , 3 > pad_color_refined = {0 , 0 , 0 };
22342241 };
22352242
22362243 static slice_instructions get_slice_instructions (struct clip_ctx * ctx, const clip_image_size & original_size) {
@@ -2257,10 +2264,11 @@ struct llava_uhd {
22572264 auto refine_size = llava_uhd::select_best_resolution (
22582265 original_size,
22592266 ctx->model .hparams .image_res_candidates );
2260- res.overview_size = clip_image_size{slice_size, slice_size};
2261- res.refined_size = refine_size;
2262- res.grid_size = clip_image_size{0 , 0 };
2263- res.padding_refined = true ;
2267+ res.overview_size = clip_image_size{slice_size, slice_size};
2268+ res.refined_size = refine_size;
2269+ res.grid_size = clip_image_size{0 , 0 };
2270+ res.padding_refined = true ;
2271+ res.interpolation_refined = img_tool::RESIZE_ALGO_BILINEAR; // preserve old behavior when padding
22642272
22652273 LOG_DBG (" %s: using pinpoints for slicing\n " , __func__);
22662274 LOG_DBG (" %s: original size: %d x %d, overview size: %d x %d, refined size: %d x %d\n " ,
@@ -2339,26 +2347,22 @@ struct llava_uhd {
23392347
23402348 static std::vector<clip_image_u8_ptr> slice_image (const clip_image_u8 * img, const slice_instructions & inst) {
23412349 std::vector<clip_image_u8_ptr> output;
2342- img_tool::resize_algo interpolation = img_tool::RESIZE_ALGO_BILINEAR; // TODO: make it configurable
23432350
23442351 // resize to overview size
23452352 clip_image_u8_ptr resized_img (clip_image_u8_init ());
2346- img_tool::resize (*img, *resized_img, inst.overview_size , interpolation);
2353+ img_tool::resize (*img, *resized_img, inst.overview_size , inst.interpolation_overview ,
2354+ inst.padding_overview , inst.pad_color_overview );
23472355 output.push_back (std::move (resized_img));
2356+
23482357 if (inst.slices .empty ()) {
23492358 // no slices, just return the resized image
23502359 return output;
23512360 }
23522361
23532362 // resize to refined size
23542363 clip_image_u8_ptr refined_img (clip_image_u8_init ());
2355- if (inst.padding_refined ) {
2356- img_tool::resize (*img, *refined_img, inst.refined_size , interpolation);
2357- } else {
2358- // only algo bicubic preserves the ratio; old models rely on this behavior
2359- // TODO: do we need to support other algos here?
2360- img_tool::resize (*img, *refined_img, inst.refined_size , img_tool::RESIZE_ALGO_BICUBIC, false );
2361- }
2364+ img_tool::resize (*img, *refined_img, inst.refined_size , inst.interpolation_refined ,
2365+ inst.padding_refined , inst.pad_color_refined );
23622366
23632367 // create slices
23642368 for (const auto & slice : inst.slices ) {
0 commit comments