Skip to content

Commit 333e2bb

Browse files
committed
fix for qwen image crashing due to ref images being too big, trial and error shows it happens after 512x512
1 parent 7179e49 commit 333e2bb

File tree

2 files changed

+59
-36
lines changed

2 files changed

+59
-36
lines changed

koboldcpp.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -65,7 +65,7 @@
6565
extra_images_max = 4 # for kontext/qwen img
6666

6767
# global vars
68-
KcppVersion = "1.101"
68+
KcppVersion = "1.101.1"
6969
showdebug = True
7070
kcpp_instance = None #global running instance
7171
global_memory = {"tunnel_url": "", "restart_target":"", "input_to_exit":False, "load_complete":False, "restart_override_config_target":""}

otherarch/sdcpp/sdtype_adapter.cpp

Lines changed: 58 additions & 35 deletions
Original file line numberDiff line numberDiff line change
@@ -568,33 +568,8 @@ static enum sample_method_t sampler_from_name(const std::string& sampler)
568568
}
569569
}
570570

571-
uint8_t* load_image_from_b64(const std::string & b64str, int& width, int& height, int expected_width = 0, int expected_height = 0, int expected_channel = 3)
571+
uint8_t* resize_image(uint8_t * image_buffer, int& width, int& height, int expected_width = 0, int expected_height = 0, int expected_channel = 3)
572572
{
573-
std::vector<uint8_t> decoded_buf = kcpp_base64_decode(b64str);
574-
int c = 0;
575-
uint8_t* image_buffer = (uint8_t*)stbi_load_from_memory(decoded_buf.data(), decoded_buf.size(), &width, &height, &c, expected_channel);
576-
577-
if (image_buffer == NULL) {
578-
fprintf(stderr, "load_image_from_b64 failed\n");
579-
return NULL;
580-
}
581-
if (c < expected_channel) {
582-
fprintf(stderr, "load_image_from_b64: the number of channels for the input image must be >= %d, but got %d channels\n", expected_channel, c);
583-
free(image_buffer);
584-
return NULL;
585-
}
586-
if (width <= 0) {
587-
fprintf(stderr, "load_image_from_b64 error: the width of image must be greater than 0\n");
588-
free(image_buffer);
589-
return NULL;
590-
}
591-
if (height <= 0) {
592-
fprintf(stderr, "load_image_from_b64 error: the height of image must be greater than 0\n");
593-
free(image_buffer);
594-
return NULL;
595-
}
596-
597-
// Resize input image ...
598573
if ((expected_width > 0 && expected_height > 0) && (height != expected_height || width != expected_width)) {
599574
float dst_aspect = (float)expected_width / (float)expected_height;
600575
float src_aspect = (float)width / (float)height;
@@ -658,7 +633,37 @@ uint8_t* load_image_from_b64(const std::string & b64str, int& width, int& height
658633
image_buffer = resized_image_buffer;
659634
}
660635
return image_buffer;
636+
}
637+
638+
uint8_t* load_image_from_b64(const std::string & b64str, int& width, int& height, int expected_width = 0, int expected_height = 0, int expected_channel = 3)
639+
{
640+
std::vector<uint8_t> decoded_buf = kcpp_base64_decode(b64str);
641+
int c = 0;
642+
uint8_t* image_buffer = (uint8_t*)stbi_load_from_memory(decoded_buf.data(), decoded_buf.size(), &width, &height, &c, expected_channel);
643+
644+
if (image_buffer == NULL) {
645+
fprintf(stderr, "load_image_from_b64 failed\n");
646+
return NULL;
647+
}
648+
if (c < expected_channel) {
649+
fprintf(stderr, "load_image_from_b64: the number of channels for the input image must be >= %d, but got %d channels\n", expected_channel, c);
650+
free(image_buffer);
651+
return NULL;
652+
}
653+
if (width <= 0) {
654+
fprintf(stderr, "load_image_from_b64 error: the width of image must be greater than 0\n");
655+
free(image_buffer);
656+
return NULL;
657+
}
658+
if (height <= 0) {
659+
fprintf(stderr, "load_image_from_b64 error: the height of image must be greater than 0\n");
660+
free(image_buffer);
661+
return NULL;
662+
}
661663

664+
// Resize input image ...
665+
image_buffer = resize_image(image_buffer,width,height,expected_width,expected_height,expected_channel);
666+
return image_buffer;
662667
}
663668

664669
static enum scheduler_t scheduler_from_name(const char * scheduler)
@@ -830,13 +835,32 @@ sd_generation_outputs sdtype_generate(const sd_generation_inputs inputs)
830835
uint8_t * loaded = load_image_from_b64(extra_image_data[i],nx2,ny2);
831836
if(loaded)
832837
{
833-
input_extraimage_buffers.push_back(loaded);
834-
sd_image_t extraimage_reference;
835-
extraimage_reference.width = nx2;
836-
extraimage_reference.height = ny2;
837-
extraimage_reference.channel = desiredchannels;
838-
extraimage_reference.data = loaded;
839-
reference_imgs.push_back(extraimage_reference);
838+
//kcpp fix: qwen image can stack overflow and crash when ref images exceed
839+
// a total res of 512x512 = 262144, so we downscale if that's the case
840+
int tgtx = nx2;
841+
int tgty = ny2;
842+
int res_lim_crash = 512 * 512;
843+
if (nx2 * ny2 > res_lim_crash)
844+
{
845+
float factor = sqrtf((float)res_lim_crash / ((float)nx2 * (float)ny2));
846+
tgtx = (int)(nx2 * factor);
847+
tgty = (int)(ny2 * factor);
848+
if (!sd_is_quiet && sddebugmode == 1)
849+
{
850+
printf("\nResized RefImg %dx%d to %dx%d", nx2, ny2, tgtx, tgty);
851+
}
852+
loaded = resize_image(loaded, nx2, ny2, tgtx, tgty);
853+
}
854+
if(loaded)
855+
{
856+
input_extraimage_buffers.push_back(loaded);
857+
sd_image_t extraimage_reference;
858+
extraimage_reference.width = nx2;
859+
extraimage_reference.height = ny2;
860+
extraimage_reference.channel = desiredchannels;
861+
extraimage_reference.data = loaded;
862+
reference_imgs.push_back(extraimage_reference);
863+
}
840864
}
841865
}
842866
else if (is_kontext || photomaker_enabled)
@@ -881,9 +905,8 @@ sd_generation_outputs sdtype_generate(const sd_generation_inputs inputs)
881905

882906
sd_img_gen_params_t params = {};
883907
sd_img_gen_params_init (&params);
884-
885908
params.batch_count = 1;
886-
909+
params.auto_resize_ref_image = true;
887910
params.prompt = sd_params->prompt.c_str();
888911
params.negative_prompt = sd_params->negative_prompt.c_str();
889912
params.clip_skip = sd_params->clip_skip;

0 commit comments

Comments
 (0)