Skip to content

Commit ecf193a

Browse files
committed
rebase
1 parent bc1551b commit ecf193a

File tree

1 file changed

+26
-11
lines changed

1 file changed

+26
-11
lines changed

stable-diffusion.cpp

Lines changed: 26 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -145,7 +145,6 @@ class StableDiffusionGGML {
145145
#endif
146146
#ifdef SD_USE_METAL
147147
LOG_DEBUG("Using Metal backend");
148-
ggml_log_set(ggml_log_callback_default, nullptr);
149148
backend = ggml_backend_metal_init();
150149
#endif
151150
#ifdef SD_USE_VULKAN
@@ -192,6 +191,8 @@ class StableDiffusionGGML {
192191
rng = std::make_shared<PhiloxRNG>();
193192
}
194193

194+
ggml_log_set(ggml_log_callback_default, nullptr);
195+
195196
init_backend();
196197

197198
ModelLoader model_loader;
@@ -330,7 +331,7 @@ class StableDiffusionGGML {
330331
if (sd_version_is_dit(version)) {
331332
use_t5xxl = true;
332333
}
333-
if (!ggml_backend_is_cpu(backend) && use_t5xxl) {
334+
if (!clip_on_cpu && !ggml_backend_is_cpu(backend) && use_t5xxl) {
334335
LOG_WARN(
335336
"!!!It appears that you are using the T5 model. Some backends may encounter issues with it."
336337
"If you notice that the generated images are completely black,"
@@ -344,14 +345,12 @@ class StableDiffusionGGML {
344345
LOG_INFO("Using flash attention in the diffusion model");
345346
}
346347
if (sd_version_is_sd3(version)) {
347-
if (sd_ctx_params->diffusion_flash_attn) {
348-
LOG_WARN("flash attention in this diffusion model is currently unsupported!");
349-
}
350348
cond_stage_model = std::make_shared<SD3CLIPEmbedder>(clip_backend,
351349
offload_params_to_cpu,
352350
model_loader.tensor_storages_types);
353351
diffusion_model = std::make_shared<MMDiTModel>(backend,
354352
offload_params_to_cpu,
353+
sd_ctx_params->diffusion_flash_attn,
355354
model_loader.tensor_storages_types);
356355
} else if (sd_version_is_flux(version)) {
357356
bool is_chroma = false;
@@ -362,6 +361,15 @@ class StableDiffusionGGML {
362361
}
363362
}
364363
if (is_chroma) {
364+
if (sd_ctx_params->diffusion_flash_attn && sd_ctx_params->chroma_use_dit_mask) {
365+
LOG_WARN(
366+
"!!!It looks like you are using Chroma with flash attention. "
367+
"This is currently unsupported. "
368+
"If you find that the generated images are broken, "
369+
"try either disabling flash attention or specifying "
370+
"--chroma-disable-dit-mask as a workaround.");
371+
}
372+
365373
cond_stage_model = std::make_shared<T5CLIPEmbedder>(clip_backend,
366374
offload_params_to_cpu,
367375
model_loader.tensor_storages_types,
@@ -573,7 +581,7 @@ class StableDiffusionGGML {
573581
if (version == VERSION_SVD) {
574582
ignore_tensors.insert("conditioner.embedders.3");
575583
}
576-
bool success = model_loader.load_tensors(tensors, ignore_tensors, n_threads);
584+
bool success = model_loader.load_tensors(tensors, ignore_tensors);
577585
if (!success) {
578586
LOG_ERROR("load tensors from model loader failed");
579587
ggml_free(ctx);
@@ -744,6 +752,10 @@ class StableDiffusionGGML {
744752
denoiser->scheduler = std::make_shared<GITSSchedule>();
745753
denoiser->scheduler->version = version;
746754
break;
755+
case SMOOTHSTEP:
756+
LOG_INFO("Running with SmoothStep scheduler");
757+
denoiser->scheduler = std::make_shared<SmoothStepSchedule>();
758+
break;
747759
case DEFAULT:
748760
// Don't touch anything.
749761
break;
@@ -1544,6 +1556,7 @@ const char* schedule_to_str[] = {
15441556
"exponential",
15451557
"ays",
15461558
"gits",
1559+
"smoothstep",
15471560
};
15481561

15491562
const char* sd_schedule_name(enum scheduler_t scheduler) {
@@ -1563,7 +1576,7 @@ enum scheduler_t str_to_schedule(const char* str) {
15631576
}
15641577

15651578
void sd_ctx_params_init(sd_ctx_params_t* sd_ctx_params) {
1566-
memset((void*)sd_ctx_params, 0, sizeof(sd_ctx_params_t));
1579+
*sd_ctx_params = {};
15671580
sd_ctx_params->vae_decode_only = true;
15681581
sd_ctx_params->vae_tiling = false;
15691582
sd_ctx_params->free_params_immediately = true;
@@ -1647,6 +1660,7 @@ char* sd_ctx_params_to_str(const sd_ctx_params_t* sd_ctx_params) {
16471660
}
16481661

16491662
void sd_sample_params_init(sd_sample_params_t* sample_params) {
1663+
*sample_params = {};
16501664
sample_params->guidance.txt_cfg = 7.0f;
16511665
sample_params->guidance.img_cfg = INFINITY;
16521666
sample_params->guidance.distilled_guidance = 3.5f;
@@ -1693,9 +1707,9 @@ char* sd_sample_params_to_str(const sd_sample_params_t* sample_params) {
16931707
}
16941708

16951709
void sd_img_gen_params_init(sd_img_gen_params_t* sd_img_gen_params) {
1696-
memset((void*)sd_img_gen_params, 0, sizeof(sd_img_gen_params_t));
1697-
sd_img_gen_params->clip_skip = -1;
1710+
*sd_img_gen_params = {};
16981711
sd_sample_params_init(&sd_img_gen_params->sample_params);
1712+
sd_img_gen_params->clip_skip = -1;
16991713
sd_img_gen_params->ref_images_count = 0;
17001714
sd_img_gen_params->width = 512;
17011715
sd_img_gen_params->height = 512;
@@ -1752,7 +1766,7 @@ char* sd_img_gen_params_to_str(const sd_img_gen_params_t* sd_img_gen_params) {
17521766
}
17531767

17541768
void sd_vid_gen_params_init(sd_vid_gen_params_t* sd_vid_gen_params) {
1755-
memset((void*)sd_vid_gen_params, 0, sizeof(sd_vid_gen_params_t));
1769+
*sd_vid_gen_params = {};
17561770
sd_sample_params_init(&sd_vid_gen_params->sample_params);
17571771
sd_sample_params_init(&sd_vid_gen_params->high_noise_sample_params);
17581772
sd_vid_gen_params->high_noise_sample_params.sample_steps = -1;
@@ -1776,6 +1790,7 @@ sd_ctx_t* new_sd_ctx(const sd_ctx_params_t* sd_ctx_params) {
17761790

17771791
sd_ctx->sd = new StableDiffusionGGML();
17781792
if (sd_ctx->sd == NULL) {
1793+
free(sd_ctx);
17791794
return NULL;
17801795
}
17811796

@@ -2378,7 +2393,7 @@ sd_image_t* generate_image(sd_ctx_t* sd_ctx, const sd_img_gen_params_t* sd_img_g
23782393
sd_img_gen_params->control_strength,
23792394
sd_img_gen_params->style_strength,
23802395
sd_img_gen_params->normalize_input,
2381-
sd_img_gen_params->input_id_images_path,
2396+
SAFE_STR(sd_img_gen_params->input_id_images_path),
23822397
ref_latents,
23832398
sd_img_gen_params->increase_ref_index,
23842399
concat_latent,

0 commit comments

Comments
 (0)