@@ -568,33 +568,8 @@ static enum sample_method_t sampler_from_name(const std::string& sampler)
568568 }
569569}
570570
571- uint8_t * load_image_from_b64 ( const std::string & b64str , int & width, int & height, int expected_width = 0 , int expected_height = 0 , int expected_channel = 3 )
571+ uint8_t * resize_image ( uint8_t * image_buffer , int & width, int & height, int expected_width = 0 , int expected_height = 0 , int expected_channel = 3 )
572572{
573- std::vector<uint8_t > decoded_buf = kcpp_base64_decode (b64str);
574- int c = 0 ;
575- uint8_t * image_buffer = (uint8_t *)stbi_load_from_memory (decoded_buf.data (), decoded_buf.size (), &width, &height, &c, expected_channel);
576-
577- if (image_buffer == NULL ) {
578- fprintf (stderr, " load_image_from_b64 failed\n " );
579- return NULL ;
580- }
581- if (c < expected_channel) {
582- fprintf (stderr, " load_image_from_b64: the number of channels for the input image must be >= %d, but got %d channels\n " , expected_channel, c);
583- free (image_buffer);
584- return NULL ;
585- }
586- if (width <= 0 ) {
587- fprintf (stderr, " load_image_from_b64 error: the width of image must be greater than 0\n " );
588- free (image_buffer);
589- return NULL ;
590- }
591- if (height <= 0 ) {
592- fprintf (stderr, " load_image_from_b64 error: the height of image must be greater than 0\n " );
593- free (image_buffer);
594- return NULL ;
595- }
596-
597- // Resize input image ...
598573 if ((expected_width > 0 && expected_height > 0 ) && (height != expected_height || width != expected_width)) {
599574 float dst_aspect = (float )expected_width / (float )expected_height;
600575 float src_aspect = (float )width / (float )height;
@@ -658,7 +633,37 @@ uint8_t* load_image_from_b64(const std::string & b64str, int& width, int& height
658633 image_buffer = resized_image_buffer;
659634 }
660635 return image_buffer;
636+ }
637+
638+ uint8_t * load_image_from_b64 (const std::string & b64str, int & width, int & height, int expected_width = 0 , int expected_height = 0 , int expected_channel = 3 )
639+ {
640+ std::vector<uint8_t > decoded_buf = kcpp_base64_decode (b64str);
641+ int c = 0 ;
642+ uint8_t * image_buffer = (uint8_t *)stbi_load_from_memory (decoded_buf.data (), decoded_buf.size (), &width, &height, &c, expected_channel);
643+
644+ if (image_buffer == NULL ) {
645+ fprintf (stderr, " load_image_from_b64 failed\n " );
646+ return NULL ;
647+ }
648+ if (c < expected_channel) {
649+ fprintf (stderr, " load_image_from_b64: the number of channels for the input image must be >= %d, but got %d channels\n " , expected_channel, c);
650+ free (image_buffer);
651+ return NULL ;
652+ }
653+ if (width <= 0 ) {
654+ fprintf (stderr, " load_image_from_b64 error: the width of image must be greater than 0\n " );
655+ free (image_buffer);
656+ return NULL ;
657+ }
658+ if (height <= 0 ) {
659+ fprintf (stderr, " load_image_from_b64 error: the height of image must be greater than 0\n " );
660+ free (image_buffer);
661+ return NULL ;
662+ }
661663
664+ // Resize input image ...
665+ image_buffer = resize_image (image_buffer,width,height,expected_width,expected_height,expected_channel);
666+ return image_buffer;
662667}
663668
664669static enum scheduler_t scheduler_from_name (const char * scheduler)
@@ -830,13 +835,32 @@ sd_generation_outputs sdtype_generate(const sd_generation_inputs inputs)
830835 uint8_t * loaded = load_image_from_b64 (extra_image_data[i],nx2,ny2);
831836 if (loaded)
832837 {
833- input_extraimage_buffers.push_back (loaded);
834- sd_image_t extraimage_reference;
835- extraimage_reference.width = nx2;
836- extraimage_reference.height = ny2;
837- extraimage_reference.channel = desiredchannels;
838- extraimage_reference.data = loaded;
839- reference_imgs.push_back (extraimage_reference);
838+ // kcpp fix: qwen image can stack overflow and crash when ref images exceed
839+ // a total res of 512x512 = 262144, so we downscale if that's the case
840+ int tgtx = nx2;
841+ int tgty = ny2;
842+ int res_lim_crash = 512 * 512 ;
843+ if (nx2 * ny2 > res_lim_crash)
844+ {
845+ float factor = sqrtf ((float )res_lim_crash / ((float )nx2 * (float )ny2));
846+ tgtx = (int )(nx2 * factor);
847+ tgty = (int )(ny2 * factor);
848+ if (!sd_is_quiet && sddebugmode == 1 )
849+ {
850+ printf (" \n Resized RefImg %dx%d to %dx%d" , nx2, ny2, tgtx, tgty);
851+ }
852+ loaded = resize_image (loaded, nx2, ny2, tgtx, tgty);
853+ }
854+ if (loaded)
855+ {
856+ input_extraimage_buffers.push_back (loaded);
857+ sd_image_t extraimage_reference;
858+ extraimage_reference.width = nx2;
859+ extraimage_reference.height = ny2;
860+ extraimage_reference.channel = desiredchannels;
861+ extraimage_reference.data = loaded;
862+ reference_imgs.push_back (extraimage_reference);
863+ }
840864 }
841865 }
842866 else if (is_kontext || photomaker_enabled)
@@ -881,9 +905,8 @@ sd_generation_outputs sdtype_generate(const sd_generation_inputs inputs)
881905
882906 sd_img_gen_params_t params = {};
883907 sd_img_gen_params_init (¶ms);
884-
885908 params.batch_count = 1 ;
886-
909+ params. auto_resize_ref_image = true ;
887910 params.prompt = sd_params->prompt .c_str ();
888911 params.negative_prompt = sd_params->negative_prompt .c_str ();
889912 params.clip_skip = sd_params->clip_skip ;
0 commit comments