4040static_assert ((int )SD_TYPE_COUNT == (int )GGML_TYPE_COUNT,
4141 " inconsistency between SD_TYPE_COUNT and GGML_TYPE_COUNT" );
4242
43- enum SDMode {
44- TXT2IMG,
45- IMG2IMG,
46- IMG2VID,
47- CONVERT,
48- MODE_COUNT
49- };
50-
5143struct SDParams {
5244 int n_threads = -1 ;
53- SDMode mode = TXT2IMG;
5445 std::string model_path;
5546 std::string clip_l_path;
5647 std::string clip_g_path;
5748 std::string t5xxl_path;
5849 std::string diffusion_model_path;
5950 std::string vae_path;
6051 std::string taesd_path;
61- std::string esrgan_path;
62- std::string controlnet_path;
63- std::string embeddings_path;
6452 std::string stacked_id_embeddings_path;
65- std::string input_id_images_path = " " ;
6653 sd_type_t wtype = SD_TYPE_COUNT;
67- std::string lora_model_dir;
68- std::string output_path = " output.png" ;
69- std::string input_path;
70- std::string mask_path;
71- std::string control_image_path;
7254
7355 std::string prompt;
7456 std::string negative_prompt;
75- float min_cfg = 1 .0f ;
7657 float cfg_scale = 7 .0f ;
77- float guidance = 3 .5f ;
78- float eta = 0 .f;
79- float style_ratio = 20 .f;
8058 int clip_skip = -1 ; // <= 0 represents unspecified
8159 int width = 512 ;
8260 int height = 512 ;
83- int batch_count = 1 ;
84-
85- int video_frames = 6 ;
86- int motion_bucket_id = 127 ;
87- int fps = 6 ;
88- float augmentation_level = 0 .f;
8961
9062 sample_method_t sample_method = EULER_A;
91- schedule_t schedule = DEFAULT;
9263 int sample_steps = 20 ;
9364 float strength = 0 .75f ;
94- float control_strength = 0 .9f ;
95- rng_type_t rng_type = CUDA_RNG;
9665 int64_t seed = 42 ;
97- bool verbose = false ;
98- bool vae_tiling = false ;
99- bool control_net_cpu = false ;
100- bool normalize_input = false ;
10166 bool clip_on_cpu = false ;
10267 bool vae_on_cpu = false ;
10368 bool diffusion_flash_attn = false ;
10469 bool diffusion_conv_direct = false ;
10570 bool vae_conv_direct = false ;
106- bool canny_preprocess = false ;
107- bool color = false ;
108- int upscale_repeats = 1 ;
109-
110- std::vector<int > skip_layers = {7 , 8 , 9 };
111- float slg_scale = 0 .f;
112- float skip_layer_start = 0 .01f ;
113- float skip_layer_end = 0 .2f ;
11471
11572 bool chroma_use_dit_mask = true ;
116- bool chroma_use_t5_mask = false ;
117- int chroma_t5_mask_pad = 1 ;
11873};
11974
12075// shared
@@ -264,8 +219,6 @@ bool sdtype_load_model(const sd_load_model_inputs inputs) {
264219 sd_params->diffusion_flash_attn = inputs.flash_attention ;
265220 sd_params->diffusion_conv_direct = inputs.diffusion_conv_direct ;
266221 sd_params->vae_conv_direct = inputs.vae_conv_direct ;
267- sd_params->input_path = " " ; // unused
268- sd_params->batch_count = 1 ;
269222 sd_params->vae_path = vaefilename;
270223 sd_params->taesd_path = taesdpath;
271224 sd_params->t5xxl_path = t5xxl_filename;
@@ -289,62 +242,53 @@ bool sdtype_load_model(const sd_load_model_inputs inputs) {
289242
290243 set_sd_log_level (sddebugmode);
291244
292- bool vae_decode_only = false ;
293- bool free_param = false ;
294- if (inputs.debugmode ==1 )
295- {
296- printf (" \n MODEL:%s\n VAE:%s\n TAESD:%s\n CNET:%s\n LORA:%s\n EMBD:%s\n VAE_DEC:%d\n VAE_TILE:%d\n FREE_PARAM:%d\n THREADS:%d\n WTYPE:%d\n RNGTYPE:%d\n SCHED:%d\n CNETCPU:%d\n\n " ,
297- sd_params->model_path .c_str (),
298- sd_params->vae_path .c_str (),
299- sd_params->taesd_path .c_str (),
300- sd_params->controlnet_path .c_str (),
301- sd_params->lora_model_dir .c_str (),
302- sd_params->embeddings_path .c_str (),
303- vae_decode_only,
304- sd_params->vae_tiling ,
305- free_param,
306- sd_params->n_threads ,
307- sd_params->wtype ,
308- sd_params->rng_type ,
309- sd_params->schedule ,
310- sd_params->control_net_cpu );
311- }
312-
313- sd_ctx_params_t params;
245+ sd_ctx_params_t params = {};
314246 sd_ctx_params_init (¶ms);
247+
315248 params.model_path = sd_params->model_path .c_str ();
316249 params.clip_l_path = sd_params->clip_l_path .c_str ();
317250 params.clip_g_path = sd_params->clip_g_path .c_str ();
318251 params.t5xxl_path = sd_params->t5xxl_path .c_str ();
319252 params.diffusion_model_path = sd_params->diffusion_model_path .c_str ();
320253 params.vae_path = sd_params->vae_path .c_str ();
321254 params.taesd_path = sd_params->taesd_path .c_str ();
322- params.control_net_path = sd_params->controlnet_path .c_str ();
323- params.lora_model_dir = sd_params->lora_model_dir .c_str ();
324- params.embedding_dir = sd_params->embeddings_path .c_str ();
325255 params.stacked_id_embed_dir = sd_params->stacked_id_embeddings_path .c_str ();
326- params.vae_decode_only = vae_decode_only;
327- params.vae_tiling = sd_params->vae_tiling ;
328- params.free_params_immediately = free_param;
256+
257+ params.vae_decode_only = false ;
258+ params.vae_tiling = false ;
259+ params.free_params_immediately = false ;
260+ params.rng_type = CUDA_RNG;
261+
329262 params.n_threads = sd_params->n_threads ;
330263 params.wtype = sd_params->wtype ;
331- params.rng_type = sd_params->rng_type ;
332- params.schedule = sd_params->schedule ;
333264 params.keep_clip_on_cpu = sd_params->clip_on_cpu ;
334- params.keep_control_net_on_cpu = sd_params->control_net_cpu ;
335- params.keep_vae_on_cpu = sd_params->vae_on_cpu ;
336265 params.diffusion_flash_attn = sd_params->diffusion_flash_attn ;
337266 params.diffusion_conv_direct = sd_params->diffusion_conv_direct ;
338267 params.vae_conv_direct = sd_params->vae_conv_direct ;
339268 params.chroma_use_dit_mask = sd_params->chroma_use_dit_mask ;
340- params.chroma_use_t5_mask = sd_params->chroma_use_t5_mask ;
341- params.chroma_t5_mask_pad = sd_params->chroma_t5_mask_pad ;
342269
343270 if (params.chroma_use_dit_mask && params.diffusion_flash_attn ) {
344271 // note we don't know yet if it's a Chroma model
345272 params.chroma_use_dit_mask = false ;
346273 }
347274
275+ if (inputs.debugmode ==1 )
276+ {
277+ std::stringstream ss;
278+ ss << " \n MODEL:" << params.model_path
279+ << " \n DIFFUSION:" << params.diffusion_model_path
280+ << " \n VAE:" << params.vae_path
281+ << " \n TAESD:" << params.taesd_path
282+ << " \n PHOTOMAKER:" << params.stacked_id_embed_dir
283+ << " \n THREADS:" << params.n_threads
284+ << " \n WTYPE:" << params.wtype
285+ << " \n DIFFUSIONFLASHATTN:" << (params.diffusion_flash_attn ? 1 : 0 )
286+ << " \n DIFFUSIONCONVDIRECT:" << (params.diffusion_conv_direct ? 1 : 0 )
287+ << " \n VAECONVDIRECT:" << (params.vae_conv_direct ? 1 : 0 )
288+ << " \n " ;
289+ printf (" %s" , ss.str ().c_str ());
290+ }
291+
348292 sd_ctx = new_sd_ctx (¶ms);
349293
350294 if (sd_ctx == NULL ) {
@@ -551,7 +495,6 @@ sd_generation_outputs sdtype_generate(const sd_generation_inputs inputs)
551495 return output;
552496 }
553497 sd_image_t * results;
554- sd_image_t * control_image = NULL ;
555498
556499 // sanitize prompts, remove quotes and limit lengths
557500 std::string cleanprompt = clean_input_prompt (inputs.prompt );
@@ -574,7 +517,8 @@ sd_generation_outputs sdtype_generate(const sd_generation_inputs inputs)
574517 sd_params->strength = inputs.denoising_strength ;
575518 sd_params->clip_skip = inputs.clip_skip ;
576519 sd_params->sample_method = sampler_from_name (inputs.sample_method );
577- sd_params->mode = (img2img_data==" " ?SDMode::TXT2IMG:SDMode::IMG2IMG);
520+
521+ bool is_img2img = img2img_data != " " ;
578522
579523 auto loadedsdver = get_loaded_sd_version (sd_ctx);
580524 if (loadedsdver == SDVersion::VERSION_FLUX)
@@ -589,7 +533,7 @@ sd_generation_outputs sdtype_generate(const sd_generation_inputs inputs)
589533 if (sd_params->sample_method == sample_method_t ::EULER_A) {
590534 // euler a broken on flux
591535 if (!sd_is_quiet && sddebugmode) {
592- printf (" Flux : switching Euler A to Euler\n " );
536+ printf (" %s : switching Euler A to Euler\n " , loaded_model_is_chroma (sd_ctx) ? " Chroma " : " Flux " );
593537 }
594538 sd_params->sample_method = sample_method_t ::EULER;
595539 }
@@ -619,7 +563,8 @@ sd_generation_outputs sdtype_generate(const sd_generation_inputs inputs)
619563
620564 sd_fix_resolution (sd_params->width , sd_params->height , img_hard_limit, img_soft_limit);
621565 if (inputs.width != sd_params->width || inputs.height != sd_params->height ) {
622- printf (" \n KCPP SD: Requested dimensions %dx%d changed to %dx%d\n " , inputs.width , inputs.height , sd_params->width , sd_params->height );
566+ printf (" \n KCPP SD: Requested dimensions %dx%d changed to %dx%d\n " ,
567+ inputs.width , inputs.height , sd_params->width , sd_params->height );
623568 }
624569
625570 // trigger tiling by image area, the memory used for the VAE buffer is 6656 bytes per image pixel, default 768x768
@@ -750,33 +695,22 @@ sd_generation_outputs sdtype_generate(const sd_generation_inputs inputs)
750695 }
751696 }
752697
753- sd_img_gen_params_t params;
698+ sd_img_gen_params_t params = {} ;
754699 sd_img_gen_params_init (¶ms);
755700
756701 params.prompt = sd_params->prompt .c_str ();
757702 params.negative_prompt = sd_params->negative_prompt .c_str ();
758703 params.clip_skip = sd_params->clip_skip ;
759704 params.guidance .txt_cfg = sd_params->cfg_scale ;
760705 params.guidance .img_cfg = sd_params->cfg_scale ;
761- params.guidance .distilled_guidance = sd_params->guidance ;
762- params.eta = sd_params->eta ;
763706 params.width = sd_params->width ;
764707 params.height = sd_params->height ;
765708 params.sample_method = sd_params->sample_method ;
766709 params.sample_steps = sd_params->sample_steps ;
767710 params.seed = sd_params->seed ;
768- params.batch_count = sd_params->batch_count ;
769- params.control_cond = control_image;
770- params.control_strength = sd_params->control_strength ;
771- params.style_strength = sd_params->style_ratio ;
772- params.normalize_input = sd_params->normalize_input ;
773- params.input_id_images_path = sd_params->input_id_images_path .c_str ();
774-
775- params.guidance .slg .layers = sd_params->skip_layers .data ();
776- params.guidance .slg .layer_count = sd_params->skip_layers .size ();
777- params.guidance .slg .layer_start = sd_params->skip_layer_start ;
778- params.guidance .slg .layer_end = sd_params->skip_layer_end ;
779- params.guidance .slg .scale = sd_params->slg_scale ;
711+ params.strength = sd_params->strength ;
712+ params.batch_count = 1 ;
713+ params.input_id_images_path = " " ;
780714
781715 params.ref_images = reference_imgs.data ();
782716 params.ref_images_count = reference_imgs.size ();
@@ -785,30 +719,31 @@ sd_generation_outputs sdtype_generate(const sd_generation_inputs inputs)
785719 extra_params.photomaker_references = photomaker_imgs.data ();
786720 extra_params.photomaker_reference_count = photomaker_imgs.size ();
787721
788- if (sd_params-> mode == TXT2IMG ) {
722+ if (!is_img2img ) {
789723
790724 if (!sd_is_quiet && sddebugmode==1 )
791725 {
792- printf (" \n TXT2IMG PROMPT:%s\n NPROMPT:%s\n CLPSKP:%d\n CFGSCLE:%f\n W:%d\n H:%d\n SM:%d\n STEP:%d\n SEED:%d\n BATCH:%d\n CIMG:%p\n CSTR:%f\n\n " ,
793- sd_params->prompt .c_str (),
794- sd_params->negative_prompt .c_str (),
795- sd_params->clip_skip ,
796- sd_params->cfg_scale ,
797- sd_params->width ,
798- sd_params->height ,
799- sd_params->sample_method ,
800- sd_params->sample_steps ,
801- (int )sd_params->seed ,
802- sd_params->batch_count ,
803- control_image,
804- sd_params->control_strength );
726+ std::stringstream ss;
727+ ss << " \n TXT2IMG PROMPT:" << params.prompt
728+ << " \n NPROMPT:" << params.negative_prompt
729+ << " \n CLPSKP:" << params.clip_skip
730+ << " \n CFGSCLE:" << params.guidance .txt_cfg
731+ << " \n SIZE:" << params.width << " x" << params.height
732+ << " \n SM:" << sd_sample_method_name (params.sample_method )
733+ << " \n STEP:" << params.sample_steps
734+ << " \n SEED:" << params.seed
735+ << " \n BATCH:" << params.batch_count
736+ << " \n\n " ;
737+ printf (" %s" , ss.str ().c_str ());
805738 }
806739
740+ fflush (stdout);
741+
807742 results = generate_image (sd_ctx, ¶ms, &extra_params);
808743
809744 } else {
810745
811- if (sd_params-> width <= 0 || sd_params-> width % 64 != 0 || sd_params-> height <= 0 || sd_params-> height % 64 != 0 ) {
746+ if (params. width <= 0 || params. width % 64 != 0 || params. height <= 0 || params. height % 64 != 0 ) {
812747 printf (" \n KCPP SD: bad request image dimensions!\n " );
813748 output.data = " " ;
814749 output.status = 0 ;
@@ -894,26 +829,27 @@ sd_generation_outputs sdtype_generate(const sd_generation_inputs inputs)
894829 }
895830 sd_image_t mask_image = { (uint32_t ) img2imgW, (uint32_t ) img2imgH, 1 , mask_image_buffer };
896831
832+ params.init_image = input_image;
833+ params.mask_image = mask_image;
834+
897835 if (!sd_is_quiet && sddebugmode==1 )
898836 {
899- printf ( " \n IMG2IMG PROMPT:%s \n NPROMPT:%s \n CLPSKP:%d \n CFGSCLE:%f \n W:%d \n H:%d \n SM:%d \n STEP:%d \n SEED:%d \n BATCH:%d \n CIMG:%p \n STR:%f \n\n " ,
900- sd_params-> prompt . c_str (),
901- sd_params-> negative_prompt . c_str (),
902- sd_params-> clip_skip ,
903- sd_params-> cfg_scale ,
904- sd_params-> width ,
905- sd_params-> height ,
906- sd_params-> sample_method ,
907- sd_params-> sample_steps ,
908- ( int )sd_params-> seed ,
909- sd_params-> batch_count ,
910- control_image,
911- sd_params-> strength );
837+ std::stringstream ss;
838+ ss << " \n nIMG2IMG PROMPT: " << params. prompt
839+ << " \n NPROMPT: " << params. negative_prompt
840+ << " \n CLPSKP: " << params. clip_skip
841+ << " \n CFGSCLE: " << params. guidance . txt_cfg
842+ << " \n SIZE: " << params. width << " x " << params. height
843+ << " \n SM: " << sd_sample_method_name (params. sample_method )
844+ << " \n STEP: " << params. sample_steps
845+ << " \n SEED: " << params. seed
846+ << " \n STRENGTH: " << params. strength
847+ << " \n BATCH: " << params. batch_count
848+ << " \n\n " ;
849+ printf ( " %s " , ss. str (). c_str () );
912850 }
913851
914- params.strength = sd_params->strength ;
915- params.init_image = input_image;
916- params.mask_image = mask_image;
852+ fflush (stdout);
917853
918854 results = generate_image (sd_ctx, ¶ms, &extra_params);
919855
@@ -927,7 +863,7 @@ sd_generation_outputs sdtype_generate(const sd_generation_inputs inputs)
927863 }
928864
929865
930- for (int i = 0 ; i < sd_params-> batch_count ; i++) {
866+ for (int i = 0 ; i < params. batch_count ; i++) {
931867 if (results[i].data == NULL ) {
932868 continue ;
933869 }
0 commit comments