@@ -80,7 +80,8 @@ struct SDParams {
8080 std::string control_image_path;
8181 std::vector<std::string> ref_image_paths;
8282 std::string control_video_path;
83- bool increase_ref_index = false ;
83+ bool auto_resize_ref_image = true ;
84+ bool increase_ref_index = false ;
8485
8586 std::string prompt;
8687 std::string negative_prompt;
@@ -131,6 +132,7 @@ struct SDParams {
131132 prediction_t prediction = DEFAULT_PRED;
132133
133134 sd_tiling_params_t vae_tiling_params = {false , 0 , 0 , 0 .5f , 0 .0f , 0 .0f };
135+ bool force_sdxl_vae_conv_scale = false ;
134136
135137 SDParams () {
136138 sd_sample_params_init (&sample_params);
@@ -174,6 +176,7 @@ void print_params(SDParams params) {
174176 printf (" %s\n " , path.c_str ());
175177 };
176178 printf (" control_video_path: %s\n " , params.control_video_path .c_str ());
179+ printf (" auto_resize_ref_image: %s\n " , params.auto_resize_ref_image ? " true" : " false" );
177180 printf (" increase_ref_index: %s\n " , params.increase_ref_index ? " true" : " false" );
178181 printf (" offload_params_to_cpu: %s\n " , params.offload_params_to_cpu ? " true" : " false" );
179182 printf (" clip_on_cpu: %s\n " , params.clip_on_cpu ? " true" : " false" );
@@ -198,6 +201,7 @@ void print_params(SDParams params) {
198201 printf (" seed: %zd\n " , params.seed );
199202 printf (" batch_count: %d\n " , params.batch_count );
200203 printf (" vae_tiling: %s\n " , params.vae_tiling_params .enabled ? " true" : " false" );
204+ printf (" force_sdxl_vae_conv_scale: %s\n " , params.force_sdxl_vae_conv_scale ? " true" : " false" );
201205 printf (" upscale_repeats: %d\n " , params.upscale_repeats );
202206 printf (" chroma_use_dit_mask: %s\n " , params.chroma_use_dit_mask ? " true" : " false" );
203207 printf (" chroma_use_t5_mask: %s\n " , params.chroma_use_t5_mask ? " true" : " false" );
@@ -242,9 +246,10 @@ void print_usage(int argc, const char* argv[]) {
242246 printf (" -i, --end-img [IMAGE] path to the end image, required by flf2v\n " );
243247 printf (" --control-image [IMAGE] path to image condition, control net\n " );
244248 printf (" -r, --ref-image [PATH] reference image for Flux Kontext models (can be used multiple times) \n " );
249+ printf (" --disable-auto-resize-ref-image disable auto resize of ref images\n " );
245250 printf (" --control-video [PATH] path to control video frames, It must be a directory path.\n " );
246251 printf (" The video frames inside should be stored as images in lexicographical (character) order\n " );
247- printf (" For example, if the control video path is `frames`, the directory contain images such as 00.png, 01.png, … etc.\n " );
252+ printf (" For example, if the control video path is `frames`, the directory contain images such as 00.png, 01.png, ... etc.\n " );
248253 printf (" --increase-ref-index automatically increase the indices of references images based on the order they are listed (starting with 1).\n " );
249254 printf (" -o, --output OUTPUT path to write result image to (default: ./output.png)\n " );
250255 printf (" -p, --prompt [PROMPT] the prompt to render\n " );
@@ -292,6 +297,7 @@ void print_usage(int argc, const char* argv[]) {
292297 printf (" --vae-tile-size [X]x[Y] tile size for vae tiling (default: 32x32)\n " );
293298 printf (" --vae-relative-tile-size [X]x[Y] relative tile size for vae tiling, in fraction of image size if < 1, in number of tiles per dim if >=1 (overrides --vae-tile-size)\n " );
294299 printf (" --vae-tile-overlap OVERLAP tile overlap for vae tiling, in fraction of tile size (default: 0.5)\n " );
300+ printf (" --force-sdxl-vae-conv-scale force use of conv scale on sdxl vae\n " );
295301 printf (" --vae-on-cpu keep vae in cpu (for low vram)\n " );
296302 printf (" --clip-on-cpu keep clip in cpu (for low vram)\n " );
297303 printf (" --diffusion-fa use flash attention in the diffusion model (for low vram)\n " );
@@ -562,6 +568,7 @@ void parse_args(int argc, const char** argv, SDParams& params) {
562568
563569 options.bool_options = {
564570 {" " , " --vae-tiling" , " " , true , ¶ms.vae_tiling_params .enabled },
571+ {" " , " --force-sdxl-vae-conv-scale" , " " , true , ¶ms.force_sdxl_vae_conv_scale },
565572 {" " , " --offload-to-cpu" , " " , true , ¶ms.offload_params_to_cpu },
566573 {" " , " --control-net-cpu" , " " , true , ¶ms.control_net_cpu },
567574 {" " , " --clip-on-cpu" , " " , true , ¶ms.clip_on_cpu },
@@ -575,6 +582,7 @@ void parse_args(int argc, const char** argv, SDParams& params) {
575582 {" " , " --chroma-disable-dit-mask" , " " , false , ¶ms.chroma_use_dit_mask },
576583 {" " , " --chroma-enable-t5-mask" , " " , true , ¶ms.chroma_use_t5_mask },
577584 {" " , " --increase-ref-index" , " " , true , ¶ms.increase_ref_index },
585+ {" " , " --disable-auto-resize-ref-image" , " " , false , ¶ms.auto_resize_ref_image },
578586 };
579587
580588 auto on_mode_arg = [&](int argc, const char ** argv, int index) {
@@ -1382,6 +1390,7 @@ int main(int argc, const char* argv[]) {
13821390 params.diffusion_flash_attn ,
13831391 params.diffusion_conv_direct ,
13841392 params.vae_conv_direct ,
1393+ params.force_sdxl_vae_conv_scale ,
13851394 params.chroma_use_dit_mask ,
13861395 params.chroma_use_t5_mask ,
13871396 params.chroma_t5_mask_pad ,
@@ -1423,6 +1432,7 @@ int main(int argc, const char* argv[]) {
14231432 init_image,
14241433 ref_images.data (),
14251434 (int )ref_images.size (),
1435+ params.auto_resize_ref_image ,
14261436 params.increase_ref_index ,
14271437 mask_image,
14281438 params.width ,
0 commit comments