@@ -674,18 +674,16 @@ sd_generation_outputs sdtype_generate(const sd_generation_inputs inputs)
674674
675675 // for img2img
676676 sd_image_t input_image = {0 ,0 ,0 ,nullptr };
677- std::vector<sd_image_t > extraimage_references;
678- extraimage_references.reserve (max_extra_images);
679- std::vector<std::vector<uint8_t >> extraimage_buffers;
680- extraimage_buffers.reserve (max_extra_images);
677+ std::vector<sd_image_t > kontext_imgs;
678+ std::vector<sd_image_t > wan_imgs;
679+ std::vector<sd_image_t > photomaker_imgs;
680+ bool is_wan = (loadedsdver == SDVersion::VERSION_WAN2 || loadedsdver == SDVersion::VERSION_WAN2_2_I2V || loadedsdver == SDVersion::VERSION_WAN2_2_TI2V);
681+ bool is_kontext = (loadedsdver==SDVersion::VERSION_FLUX && !loaded_model_is_chroma (sd_ctx));
681682
682683 int nx, ny, nc;
683684 int img2imgW = sd_params->width ; // for img2img input
684685 int img2imgH = sd_params->height ;
685686 int img2imgC = 3 ; // Assuming RGB image
686- // because the reference image can be larger than the output image, allocate at least enough for 1024x1024
687- const int imgMemNeed = std::max (img2imgW * img2imgH * img2imgC + 512 , 1024 * 1024 * img2imgC + 512 );
688- std::vector<std::vector<uint8_t >> resized_extraimage_bufs (max_extra_images, std::vector<uint8_t >(imgMemNeed));
689687
690688 std::string ts = get_timestamp_str ();
691689 if (!sd_is_quiet)
@@ -707,52 +705,45 @@ sd_generation_outputs sdtype_generate(const sd_generation_inputs inputs)
707705 }
708706 input_extraimage_buffers.clear ();
709707 }
710- extraimage_buffers.clear ();
711- extraimage_references.clear ();
712708 for (int i=0 ;i<extra_image_data.size () && i<max_extra_images;++i)
713709 {
714710 int nx2, ny2, nc2;
715711 int desiredchannels = 3 ;
716- extraimage_buffers.push_back (kcpp_base64_decode (extra_image_data[i]));
717- input_extraimage_buffers.push_back (stbi_load_from_memory (extraimage_buffers[i].data (), extraimage_buffers[i].size (), &nx2, &ny2, &nc2, desiredchannels));
718- // Resize the image
719- float aspect_ratio = static_cast <float >(nx2) / ny2;
720- int desiredWidth = nx2;
721- int desiredHeight = ny2;
722- int smallestsrcdim = std::min (img2imgW,img2imgH);
723- if (desiredWidth > desiredHeight)
712+ if (is_wan)
724713 {
725- desiredWidth = smallestsrcdim;
726- desiredHeight = smallestsrcdim / aspect_ratio;
727- } else {
728- desiredHeight = smallestsrcdim;
729- desiredWidth = smallestsrcdim * aspect_ratio;
714+ uint8_t * loaded = load_image_from_b64 (extra_image_data[i],nx2,ny2,img2imgW,img2imgH,3 );
715+ if (loaded)
716+ {
717+ input_extraimage_buffers.push_back (loaded);
718+ sd_image_t extraimage_reference;
719+ extraimage_reference.width = nx2;
720+ extraimage_reference.height = ny2;
721+ extraimage_reference.channel = desiredchannels;
722+ extraimage_reference.data = loaded;
723+ wan_imgs.push_back (extraimage_reference);
724+ }
730725 }
731-
732- // round dims to 64
733- desiredWidth = roundnearest (16 ,desiredWidth);
734- desiredHeight = roundnearest (16 ,desiredHeight);
735- desiredWidth = std::clamp (desiredWidth,64 ,1024 );
736- desiredHeight = std::clamp (desiredHeight,64 ,1024 );
737-
738- if (!sd_is_quiet && sddebugmode==1 )
726+ else if (is_kontext || photomaker_enabled)
739727 {
740- printf (" Resize Extraimg: %dx%d to %dx%d\n " ,nx2,ny2,desiredWidth,desiredHeight);
741- }
742- int resok = stbir_resize_uint8 (input_extraimage_buffers[i], nx2, ny2, 0 , resized_extraimage_bufs[i].data (), desiredWidth, desiredHeight, 0 , desiredchannels);
743- if (!resok) {
744- printf (" \n KCPP SD: resize extra image failed!\n " );
745- output.data = " " ;
746- output.animated = 0 ;
747- output.status = 0 ;
748- return output;
728+ uint8_t * loaded = load_image_from_b64 (extra_image_data[i],nx2,ny2);
729+ if (loaded)
730+ {
731+ input_extraimage_buffers.push_back (loaded);
732+ sd_image_t extraimage_reference;
733+ extraimage_reference.width = nx2;
734+ extraimage_reference.height = ny2;
735+ extraimage_reference.channel = desiredchannels;
736+ extraimage_reference.data = loaded;
737+ if (is_kontext)
738+ {
739+ kontext_imgs.push_back (extraimage_reference);
740+ }
741+ else
742+ {
743+ photomaker_imgs.push_back (extraimage_reference);
744+ }
745+ }
749746 }
750- sd_image_t extraimage_reference;
751- extraimage_reference.width = desiredWidth;
752- extraimage_reference.height = desiredHeight;
753- extraimage_reference.channel = desiredchannels;
754- extraimage_reference.data = resized_extraimage_bufs[i].data ();
755- extraimage_references.push_back (extraimage_reference);
756747 }
757748
758749 // ensure prompt has img keyword, otherwise append it
@@ -765,48 +756,10 @@ sd_generation_outputs sdtype_generate(const sd_generation_inputs inputs)
765756 sd_params->prompt = " person " + sd_params->prompt ;
766757 }
767758 }
768- }
769-
770- std::vector<sd_image_t > reference_imgs;
771- std::vector<sd_image_t > wan_imgs;
772- bool is_wan = (loadedsdver == SDVersion::VERSION_WAN2 || loadedsdver == SDVersion::VERSION_WAN2_2_I2V || loadedsdver == SDVersion::VERSION_WAN2_2_TI2V);
773- bool is_kontext = (loadedsdver==SDVersion::VERSION_FLUX && !loaded_model_is_chroma (sd_ctx));
774- if (extra_image_data.size ()>0 )
775- {
776- if (is_kontext)
777- {
778- for (int i=0 ;i<extra_image_data.size ();++i)
779- {
780- reference_imgs.push_back (extraimage_references[i]);
781- }
782- if (!sd_is_quiet && sddebugmode==1 )
783- {
784- printf (" \n Image Gen: Using %d reference images\n " ,reference_imgs.size ());
785- }
786- }
787- if (is_wan)
788- {
789- for (int i=0 ;i<extra_image_data.size ();++i)
790- {
791- wan_imgs.push_back (extraimage_references[i]);
792- }
793- if (!sd_is_quiet && sddebugmode==1 )
794- {
795- printf (" \n Image Gen: Using %d video reference images\n " ,wan_imgs.size ());
796- }
797- }
798- }
799759
800- std::vector<sd_image_t > photomaker_imgs;
801- if (photomaker_enabled && extra_image_data.size ()>0 )
802- {
803- for (int i=0 ;i<extra_image_data.size ();++i)
804- {
805- photomaker_imgs.push_back (extraimage_references[i]);
806- }
807760 if (!sd_is_quiet && sddebugmode==1 )
808761 {
809- printf (" \n Photomaker: Using %d reference images \n " ,photomaker_imgs.size ());
762+ printf (" \n ImageGen References: Kontext=%d Wan= %d Photomaker=%d \n " ,kontext_imgs. size (),wan_imgs. size () ,photomaker_imgs.size ());
810763 }
811764 }
812765
@@ -829,8 +782,8 @@ sd_generation_outputs sdtype_generate(const sd_generation_inputs inputs)
829782 params.vae_tiling_params .enabled = dotile;
830783 params.batch_count = 1 ;
831784
832- params.ref_images = reference_imgs .data ();
833- params.ref_images_count = reference_imgs .size ();
785+ params.ref_images = kontext_imgs .data ();
786+ params.ref_images_count = kontext_imgs .size ();
834787 params.pm_params .id_images = photomaker_imgs.data ();
835788 params.pm_params .id_images_count = photomaker_imgs.size ();
836789
0 commit comments