@@ -145,7 +145,6 @@ class StableDiffusionGGML {
145145#endif
146146#ifdef SD_USE_METAL
147147 LOG_DEBUG (" Using Metal backend" );
148- ggml_log_set (ggml_log_callback_default, nullptr );
149148 backend = ggml_backend_metal_init ();
150149#endif
151150#ifdef SD_USE_VULKAN
@@ -192,6 +191,8 @@ class StableDiffusionGGML {
192191 rng = std::make_shared<PhiloxRNG>();
193192 }
194193
194+ ggml_log_set (ggml_log_callback_default, nullptr );
195+
195196 init_backend ();
196197
197198 ModelLoader model_loader;
@@ -330,7 +331,7 @@ class StableDiffusionGGML {
330331 if (sd_version_is_dit (version)) {
331332 use_t5xxl = true ;
332333 }
333- if (!ggml_backend_is_cpu (backend) && use_t5xxl) {
334+ if (!clip_on_cpu && ! ggml_backend_is_cpu (backend) && use_t5xxl) {
334335 LOG_WARN (
335336 " !!!It appears that you are using the T5 model. Some backends may encounter issues with it."
336337 " If you notice that the generated images are completely black,"
@@ -344,14 +345,12 @@ class StableDiffusionGGML {
344345 LOG_INFO (" Using flash attention in the diffusion model" );
345346 }
346347 if (sd_version_is_sd3 (version)) {
347- if (sd_ctx_params->diffusion_flash_attn ) {
348- LOG_WARN (" flash attention in this diffusion model is currently unsupported!" );
349- }
350348 cond_stage_model = std::make_shared<SD3CLIPEmbedder>(clip_backend,
351349 offload_params_to_cpu,
352350 model_loader.tensor_storages_types );
353351 diffusion_model = std::make_shared<MMDiTModel>(backend,
354352 offload_params_to_cpu,
353+ sd_ctx_params->diffusion_flash_attn ,
355354 model_loader.tensor_storages_types );
356355 } else if (sd_version_is_flux (version)) {
357356 bool is_chroma = false ;
@@ -362,6 +361,15 @@ class StableDiffusionGGML {
362361 }
363362 }
364363 if (is_chroma) {
364+ if (sd_ctx_params->diffusion_flash_attn && sd_ctx_params->chroma_use_dit_mask ) {
365+ LOG_WARN (
366+ " !!!It looks like you are using Chroma with flash attention. "
367+ " This is currently unsupported. "
368+ " If you find that the generated images are broken, "
369+ " try either disabling flash attention or specifying "
370+ " --chroma-disable-dit-mask as a workaround." );
371+ }
372+
365373 cond_stage_model = std::make_shared<T5CLIPEmbedder>(clip_backend,
366374 offload_params_to_cpu,
367375 model_loader.tensor_storages_types ,
@@ -573,7 +581,7 @@ class StableDiffusionGGML {
573581 if (version == VERSION_SVD) {
574582 ignore_tensors.insert (" conditioner.embedders.3" );
575583 }
576- bool success = model_loader.load_tensors (tensors, ignore_tensors, n_threads );
584+ bool success = model_loader.load_tensors (tensors, ignore_tensors);
577585 if (!success) {
578586 LOG_ERROR (" load tensors from model loader failed" );
579587 ggml_free (ctx);
@@ -744,6 +752,10 @@ class StableDiffusionGGML {
744752 denoiser->scheduler = std::make_shared<GITSSchedule>();
745753 denoiser->scheduler ->version = version;
746754 break ;
755+ case SMOOTHSTEP:
756+ LOG_INFO (" Running with SmoothStep scheduler" );
757+ denoiser->scheduler = std::make_shared<SmoothStepSchedule>();
758+ break ;
747759 case DEFAULT:
748760 // Don't touch anything.
749761 break ;
@@ -1544,6 +1556,7 @@ const char* schedule_to_str[] = {
15441556 " exponential" ,
15451557 " ays" ,
15461558 " gits" ,
1559+ " smoothstep" ,
15471560};
15481561
15491562const char * sd_schedule_name (enum scheduler_t scheduler) {
@@ -1563,7 +1576,7 @@ enum scheduler_t str_to_schedule(const char* str) {
15631576}
15641577
15651578void sd_ctx_params_init (sd_ctx_params_t * sd_ctx_params) {
1566- memset (( void *) sd_ctx_params, 0 , sizeof ( sd_ctx_params_t )) ;
1579+ * sd_ctx_params = {} ;
15671580 sd_ctx_params->vae_decode_only = true ;
15681581 sd_ctx_params->vae_tiling = false ;
15691582 sd_ctx_params->free_params_immediately = true ;
@@ -1647,6 +1660,7 @@ char* sd_ctx_params_to_str(const sd_ctx_params_t* sd_ctx_params) {
16471660}
16481661
16491662void sd_sample_params_init (sd_sample_params_t * sample_params) {
1663+ *sample_params = {};
16501664 sample_params->guidance .txt_cfg = 7 .0f ;
16511665 sample_params->guidance .img_cfg = INFINITY;
16521666 sample_params->guidance .distilled_guidance = 3 .5f ;
@@ -1693,9 +1707,9 @@ char* sd_sample_params_to_str(const sd_sample_params_t* sample_params) {
16931707}
16941708
16951709void sd_img_gen_params_init (sd_img_gen_params_t * sd_img_gen_params) {
1696- memset ((void *)sd_img_gen_params, 0 , sizeof (sd_img_gen_params_t ));
1697- sd_img_gen_params->clip_skip = -1 ;
1710+ *sd_img_gen_params = {};
16981711 sd_sample_params_init (&sd_img_gen_params->sample_params );
1712+ sd_img_gen_params->clip_skip = -1 ;
16991713 sd_img_gen_params->ref_images_count = 0 ;
17001714 sd_img_gen_params->width = 512 ;
17011715 sd_img_gen_params->height = 512 ;
@@ -1752,7 +1766,7 @@ char* sd_img_gen_params_to_str(const sd_img_gen_params_t* sd_img_gen_params) {
17521766}
17531767
17541768void sd_vid_gen_params_init (sd_vid_gen_params_t * sd_vid_gen_params) {
1755- memset (( void *) sd_vid_gen_params, 0 , sizeof ( sd_vid_gen_params_t )) ;
1769+ * sd_vid_gen_params = {} ;
17561770 sd_sample_params_init (&sd_vid_gen_params->sample_params );
17571771 sd_sample_params_init (&sd_vid_gen_params->high_noise_sample_params );
17581772 sd_vid_gen_params->high_noise_sample_params .sample_steps = -1 ;
@@ -1776,6 +1790,7 @@ sd_ctx_t* new_sd_ctx(const sd_ctx_params_t* sd_ctx_params) {
17761790
17771791 sd_ctx->sd = new StableDiffusionGGML ();
17781792 if (sd_ctx->sd == NULL ) {
1793+ free (sd_ctx);
17791794 return NULL ;
17801795 }
17811796
@@ -2378,7 +2393,7 @@ sd_image_t* generate_image(sd_ctx_t* sd_ctx, const sd_img_gen_params_t* sd_img_g
23782393 sd_img_gen_params->control_strength ,
23792394 sd_img_gen_params->style_strength ,
23802395 sd_img_gen_params->normalize_input ,
2381- sd_img_gen_params->input_id_images_path ,
2396+ SAFE_STR ( sd_img_gen_params->input_id_images_path ) ,
23822397 ref_latents,
23832398 sd_img_gen_params->increase_ref_index ,
23842399 concat_latent,
0 commit comments