Skip to content

Commit 9be9f9c

Browse files
committed
clean up default parameters in sdtype_adapter
Most image generation parameters are not customizable through the API, and their values simply repeat the default set by sd.cpp. So, remove them from the SDParams struct, and place the needed constant values directly on the code. Also, print debug information directly from the sd.cpp parameters. And to help with type mismatches and general readability, replace the fromatting with stringstreams.
1 parent 8fd2a47 commit 9be9f9c

File tree

1 file changed

+68
-132
lines changed

1 file changed

+68
-132
lines changed

otherarch/sdcpp/sdtype_adapter.cpp

Lines changed: 68 additions & 132 deletions
Original file line numberDiff line numberDiff line change
@@ -40,81 +40,36 @@
4040
static_assert((int)SD_TYPE_COUNT == (int)GGML_TYPE_COUNT,
4141
"inconsistency between SD_TYPE_COUNT and GGML_TYPE_COUNT");
4242

43-
enum SDMode {
44-
TXT2IMG,
45-
IMG2IMG,
46-
IMG2VID,
47-
CONVERT,
48-
MODE_COUNT
49-
};
50-
5143
struct SDParams {
5244
int n_threads = -1;
53-
SDMode mode = TXT2IMG;
5445
std::string model_path;
5546
std::string clip_l_path;
5647
std::string clip_g_path;
5748
std::string t5xxl_path;
5849
std::string diffusion_model_path;
5950
std::string vae_path;
6051
std::string taesd_path;
61-
std::string esrgan_path;
62-
std::string controlnet_path;
63-
std::string embeddings_path;
6452
std::string stacked_id_embeddings_path;
65-
std::string input_id_images_path = "";
6653
sd_type_t wtype = SD_TYPE_COUNT;
67-
std::string lora_model_dir;
68-
std::string output_path = "output.png";
69-
std::string input_path;
70-
std::string mask_path;
71-
std::string control_image_path;
7254

7355
std::string prompt;
7456
std::string negative_prompt;
75-
float min_cfg = 1.0f;
7657
float cfg_scale = 7.0f;
77-
float guidance = 3.5f;
78-
float eta = 0.f;
79-
float style_ratio = 20.f;
8058
int clip_skip = -1; // <= 0 represents unspecified
8159
int width = 512;
8260
int height = 512;
83-
int batch_count = 1;
84-
85-
int video_frames = 6;
86-
int motion_bucket_id = 127;
87-
int fps = 6;
88-
float augmentation_level = 0.f;
8961

9062
sample_method_t sample_method = EULER_A;
91-
schedule_t schedule = DEFAULT;
9263
int sample_steps = 20;
9364
float strength = 0.75f;
94-
float control_strength = 0.9f;
95-
rng_type_t rng_type = CUDA_RNG;
9665
int64_t seed = 42;
97-
bool verbose = false;
98-
bool vae_tiling = false;
99-
bool control_net_cpu = false;
100-
bool normalize_input = false;
10166
bool clip_on_cpu = false;
10267
bool vae_on_cpu = false;
10368
bool diffusion_flash_attn = false;
10469
bool diffusion_conv_direct = false;
10570
bool vae_conv_direct = false;
106-
bool canny_preprocess = false;
107-
bool color = false;
108-
int upscale_repeats = 1;
109-
110-
std::vector<int> skip_layers = {7, 8, 9};
111-
float slg_scale = 0.f;
112-
float skip_layer_start = 0.01f;
113-
float skip_layer_end = 0.2f;
11471

11572
bool chroma_use_dit_mask = true;
116-
bool chroma_use_t5_mask = false;
117-
int chroma_t5_mask_pad = 1;
11873
};
11974

12075
//shared
@@ -264,8 +219,6 @@ bool sdtype_load_model(const sd_load_model_inputs inputs) {
264219
sd_params->diffusion_flash_attn = inputs.flash_attention;
265220
sd_params->diffusion_conv_direct = inputs.diffusion_conv_direct;
266221
sd_params->vae_conv_direct = inputs.vae_conv_direct;
267-
sd_params->input_path = ""; //unused
268-
sd_params->batch_count = 1;
269222
sd_params->vae_path = vaefilename;
270223
sd_params->taesd_path = taesdpath;
271224
sd_params->t5xxl_path = t5xxl_filename;
@@ -289,62 +242,53 @@ bool sdtype_load_model(const sd_load_model_inputs inputs) {
289242

290243
set_sd_log_level(sddebugmode);
291244

292-
bool vae_decode_only = false;
293-
bool free_param = false;
294-
if(inputs.debugmode==1)
295-
{
296-
printf("\nMODEL:%s\nVAE:%s\nTAESD:%s\nCNET:%s\nLORA:%s\nEMBD:%s\nVAE_DEC:%d\nVAE_TILE:%d\nFREE_PARAM:%d\nTHREADS:%d\nWTYPE:%d\nRNGTYPE:%d\nSCHED:%d\nCNETCPU:%d\n\n",
297-
sd_params->model_path.c_str(),
298-
sd_params->vae_path.c_str(),
299-
sd_params->taesd_path.c_str(),
300-
sd_params->controlnet_path.c_str(),
301-
sd_params->lora_model_dir.c_str(),
302-
sd_params->embeddings_path.c_str(),
303-
vae_decode_only,
304-
sd_params->vae_tiling,
305-
free_param,
306-
sd_params->n_threads,
307-
sd_params->wtype,
308-
sd_params->rng_type,
309-
sd_params->schedule,
310-
sd_params->control_net_cpu);
311-
}
312-
313-
sd_ctx_params_t params;
245+
sd_ctx_params_t params = {};
314246
sd_ctx_params_init(&params);
247+
315248
params.model_path = sd_params->model_path.c_str();
316249
params.clip_l_path = sd_params->clip_l_path.c_str();
317250
params.clip_g_path = sd_params->clip_g_path.c_str();
318251
params.t5xxl_path = sd_params->t5xxl_path.c_str();
319252
params.diffusion_model_path = sd_params->diffusion_model_path.c_str();
320253
params.vae_path = sd_params->vae_path.c_str();
321254
params.taesd_path = sd_params->taesd_path.c_str();
322-
params.control_net_path = sd_params->controlnet_path.c_str();
323-
params.lora_model_dir = sd_params->lora_model_dir.c_str();
324-
params.embedding_dir = sd_params->embeddings_path.c_str();
325255
params.stacked_id_embed_dir = sd_params->stacked_id_embeddings_path.c_str();
326-
params.vae_decode_only = vae_decode_only;
327-
params.vae_tiling = sd_params->vae_tiling;
328-
params.free_params_immediately = free_param;
256+
257+
params.vae_decode_only = false;
258+
params.vae_tiling = false;
259+
params.free_params_immediately = false;
260+
params.rng_type = CUDA_RNG;
261+
329262
params.n_threads = sd_params->n_threads;
330263
params.wtype = sd_params->wtype;
331-
params.rng_type = sd_params->rng_type;
332-
params.schedule = sd_params->schedule;
333264
params.keep_clip_on_cpu = sd_params->clip_on_cpu;
334-
params.keep_control_net_on_cpu = sd_params->control_net_cpu;
335-
params.keep_vae_on_cpu = sd_params->vae_on_cpu;
336265
params.diffusion_flash_attn = sd_params->diffusion_flash_attn;
337266
params.diffusion_conv_direct = sd_params->diffusion_conv_direct;
338267
params.vae_conv_direct = sd_params->vae_conv_direct;
339268
params.chroma_use_dit_mask = sd_params->chroma_use_dit_mask;
340-
params.chroma_use_t5_mask = sd_params->chroma_use_t5_mask;
341-
params.chroma_t5_mask_pad = sd_params->chroma_t5_mask_pad;
342269

343270
if (params.chroma_use_dit_mask && params.diffusion_flash_attn) {
344271
// note we don't know yet if it's a Chroma model
345272
params.chroma_use_dit_mask = false;
346273
}
347274

275+
if(inputs.debugmode==1)
276+
{
277+
std::stringstream ss;
278+
ss << "\nMODEL:" << params.model_path
279+
<< "\nDIFFUSION:" << params.diffusion_model_path
280+
<< "\nVAE:" << params.vae_path
281+
<< "\nTAESD:" << params.taesd_path
282+
<< "\nPHOTOMAKER:" << params.stacked_id_embed_dir
283+
<< "\nTHREADS:" << params.n_threads
284+
<< "\nWTYPE:" << params.wtype
285+
<< "\nDIFFUSIONFLASHATTN:" << (params.diffusion_flash_attn ? 1 : 0)
286+
<< "\nDIFFUSIONCONVDIRECT:" << (params.diffusion_conv_direct ? 1 : 0)
287+
<< "\nVAECONVDIRECT:" << (params.vae_conv_direct ? 1 : 0)
288+
<< "\n";
289+
printf("%s", ss.str().c_str());
290+
}
291+
348292
sd_ctx = new_sd_ctx(&params);
349293

350294
if (sd_ctx == NULL) {
@@ -551,7 +495,6 @@ sd_generation_outputs sdtype_generate(const sd_generation_inputs inputs)
551495
return output;
552496
}
553497
sd_image_t * results;
554-
sd_image_t* control_image = NULL;
555498

556499
//sanitize prompts, remove quotes and limit lengths
557500
std::string cleanprompt = clean_input_prompt(inputs.prompt);
@@ -574,7 +517,8 @@ sd_generation_outputs sdtype_generate(const sd_generation_inputs inputs)
574517
sd_params->strength = inputs.denoising_strength;
575518
sd_params->clip_skip = inputs.clip_skip;
576519
sd_params->sample_method = sampler_from_name(inputs.sample_method);
577-
sd_params->mode = (img2img_data==""?SDMode::TXT2IMG:SDMode::IMG2IMG);
520+
521+
bool is_img2img = img2img_data != "";
578522

579523
auto loadedsdver = get_loaded_sd_version(sd_ctx);
580524
if (loadedsdver == SDVersion::VERSION_FLUX)
@@ -589,7 +533,7 @@ sd_generation_outputs sdtype_generate(const sd_generation_inputs inputs)
589533
if (sd_params->sample_method == sample_method_t::EULER_A) {
590534
//euler a broken on flux
591535
if (!sd_is_quiet && sddebugmode) {
592-
printf("Flux: switching Euler A to Euler\n");
536+
printf("%s: switching Euler A to Euler\n", loaded_model_is_chroma(sd_ctx) ? "Chroma" : "Flux");
593537
}
594538
sd_params->sample_method = sample_method_t::EULER;
595539
}
@@ -619,7 +563,8 @@ sd_generation_outputs sdtype_generate(const sd_generation_inputs inputs)
619563

620564
sd_fix_resolution(sd_params->width, sd_params->height, img_hard_limit, img_soft_limit);
621565
if (inputs.width != sd_params->width || inputs.height != sd_params->height) {
622-
printf("\nKCPP SD: Requested dimensions %dx%d changed to %dx%d\n", inputs.width, inputs.height, sd_params->width, sd_params->height);
566+
printf("\nKCPP SD: Requested dimensions %dx%d changed to %dx%d\n",
567+
inputs.width, inputs.height, sd_params->width, sd_params->height);
623568
}
624569

625570
// trigger tiling by image area, the memory used for the VAE buffer is 6656 bytes per image pixel, default 768x768
@@ -750,33 +695,22 @@ sd_generation_outputs sdtype_generate(const sd_generation_inputs inputs)
750695
}
751696
}
752697

753-
sd_img_gen_params_t params;
698+
sd_img_gen_params_t params = {};
754699
sd_img_gen_params_init (&params);
755700

756701
params.prompt = sd_params->prompt.c_str();
757702
params.negative_prompt = sd_params->negative_prompt.c_str();
758703
params.clip_skip = sd_params->clip_skip;
759704
params.guidance.txt_cfg = sd_params->cfg_scale;
760705
params.guidance.img_cfg = sd_params->cfg_scale;
761-
params.guidance.distilled_guidance = sd_params->guidance;
762-
params.eta = sd_params->eta;
763706
params.width = sd_params->width;
764707
params.height = sd_params->height;
765708
params.sample_method = sd_params->sample_method;
766709
params.sample_steps = sd_params->sample_steps;
767710
params.seed = sd_params->seed;
768-
params.batch_count = sd_params->batch_count;
769-
params.control_cond = control_image;
770-
params.control_strength = sd_params->control_strength;
771-
params.style_strength = sd_params->style_ratio;
772-
params.normalize_input = sd_params->normalize_input;
773-
params.input_id_images_path = sd_params->input_id_images_path.c_str();
774-
775-
params.guidance.slg.layers = sd_params->skip_layers.data();
776-
params.guidance.slg.layer_count = sd_params->skip_layers.size();
777-
params.guidance.slg.layer_start = sd_params->skip_layer_start;
778-
params.guidance.slg.layer_end = sd_params->skip_layer_end;
779-
params.guidance.slg.scale = sd_params->slg_scale;
711+
params.strength = sd_params->strength;
712+
params.batch_count = 1;
713+
params.input_id_images_path = "";
780714

781715
params.ref_images = reference_imgs.data();
782716
params.ref_images_count = reference_imgs.size();
@@ -785,30 +719,31 @@ sd_generation_outputs sdtype_generate(const sd_generation_inputs inputs)
785719
extra_params.photomaker_references = photomaker_imgs.data();
786720
extra_params.photomaker_reference_count = photomaker_imgs.size();
787721

788-
if (sd_params->mode == TXT2IMG) {
722+
if (!is_img2img) {
789723

790724
if(!sd_is_quiet && sddebugmode==1)
791725
{
792-
printf("\nTXT2IMG PROMPT:%s\nNPROMPT:%s\nCLPSKP:%d\nCFGSCLE:%f\nW:%d\nH:%d\nSM:%d\nSTEP:%d\nSEED:%d\nBATCH:%d\nCIMG:%p\nCSTR:%f\n\n",
793-
sd_params->prompt.c_str(),
794-
sd_params->negative_prompt.c_str(),
795-
sd_params->clip_skip,
796-
sd_params->cfg_scale,
797-
sd_params->width,
798-
sd_params->height,
799-
sd_params->sample_method,
800-
sd_params->sample_steps,
801-
(int)sd_params->seed,
802-
sd_params->batch_count,
803-
control_image,
804-
sd_params->control_strength);
726+
std::stringstream ss;
727+
ss << "\nTXT2IMG PROMPT:" << params.prompt
728+
<< "\nNPROMPT:" << params.negative_prompt
729+
<< "\nCLPSKP:" << params.clip_skip
730+
<< "\nCFGSCLE:" << params.guidance.txt_cfg
731+
<< "\nSIZE:" << params.width << "x" << params.height
732+
<< "\nSM:" << sd_sample_method_name(params.sample_method)
733+
<< "\nSTEP:" << params.sample_steps
734+
<< "\nSEED:" << params.seed
735+
<< "\nBATCH:" << params.batch_count
736+
<< "\n\n";
737+
printf("%s", ss.str().c_str());
805738
}
806739

740+
fflush(stdout);
741+
807742
results = generate_image(sd_ctx, &params, &extra_params);
808743

809744
} else {
810745

811-
if (sd_params->width <= 0 || sd_params->width % 64 != 0 || sd_params->height <= 0 || sd_params->height % 64 != 0) {
746+
if (params.width <= 0 || params.width % 64 != 0 || params.height <= 0 || params.height % 64 != 0) {
812747
printf("\nKCPP SD: bad request image dimensions!\n");
813748
output.data = "";
814749
output.status = 0;
@@ -894,26 +829,27 @@ sd_generation_outputs sdtype_generate(const sd_generation_inputs inputs)
894829
}
895830
sd_image_t mask_image = { (uint32_t) img2imgW, (uint32_t) img2imgH, 1, mask_image_buffer };
896831

832+
params.init_image = input_image;
833+
params.mask_image = mask_image;
834+
897835
if(!sd_is_quiet && sddebugmode==1)
898836
{
899-
printf("\nIMG2IMG PROMPT:%s\nNPROMPT:%s\nCLPSKP:%d\nCFGSCLE:%f\nW:%d\nH:%d\nSM:%d\nSTEP:%d\nSEED:%d\nBATCH:%d\nCIMG:%p\nSTR:%f\n\n",
900-
sd_params->prompt.c_str(),
901-
sd_params->negative_prompt.c_str(),
902-
sd_params->clip_skip,
903-
sd_params->cfg_scale,
904-
sd_params->width,
905-
sd_params->height,
906-
sd_params->sample_method,
907-
sd_params->sample_steps,
908-
(int)sd_params->seed,
909-
sd_params->batch_count,
910-
control_image,
911-
sd_params->strength);
837+
std::stringstream ss;
838+
ss << "\nnIMG2IMG PROMPT:" << params.prompt
839+
<< "\nNPROMPT:" << params.negative_prompt
840+
<< "\nCLPSKP:" << params.clip_skip
841+
<< "\nCFGSCLE:" << params.guidance.txt_cfg
842+
<< "\nSIZE:" << params.width << "x" << params.height
843+
<< "\nSM:" << sd_sample_method_name(params.sample_method)
844+
<< "\nSTEP:" << params.sample_steps
845+
<< "\nSEED:" << params.seed
846+
<< "\nSTRENGTH:" << params.strength
847+
<< "\nBATCH:" << params.batch_count
848+
<< "\n\n";
849+
printf("%s", ss.str().c_str());
912850
}
913851

914-
params.strength = sd_params->strength;
915-
params.init_image = input_image;
916-
params.mask_image = mask_image;
852+
fflush(stdout);
917853

918854
results = generate_image(sd_ctx, &params, &extra_params);
919855

@@ -927,7 +863,7 @@ sd_generation_outputs sdtype_generate(const sd_generation_inputs inputs)
927863
}
928864

929865

930-
for (int i = 0; i < sd_params->batch_count; i++) {
866+
for (int i = 0; i < params.batch_count; i++) {
931867
if (results[i].data == NULL) {
932868
continue;
933869
}

0 commit comments

Comments
 (0)