From 9c7cb44f24a1656bcff88f7727f9b728e721c30b Mon Sep 17 00:00:00 2001 From: Wagner Bruna Date: Thu, 14 Aug 2025 08:16:52 -0300 Subject: [PATCH 1/4] fix: avoid segfault for pix2pix models with no reference images --- stable-diffusion.cpp | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/stable-diffusion.cpp b/stable-diffusion.cpp index db4e07cb0..21460b0e8 100644 --- a/stable-diffusion.cpp +++ b/stable-diffusion.cpp @@ -2322,6 +2322,10 @@ sd_image_t* generate_image(sd_ctx_t* sd_ctx, const sd_img_gen_params_t* sd_img_g if (sd_img_gen_params->ref_images_count > 0) { LOG_INFO("EDIT mode"); } + else if (sd_ctx->sd->version == VERSION_SD1_PIX2PIX || sd_ctx->sd->version == VERSION_SDXL_PIX2PIX) { + LOG_ERROR("This model needs at least one reference image"); + return NULL; + } std::vector ref_latents; for (int i = 0; i < sd_img_gen_params->ref_images_count; i++) { From 9dae12fd4ea01e8801e4914dce02ff8a10711e26 Mon Sep 17 00:00:00 2001 From: Wagner Bruna Date: Fri, 15 Aug 2025 16:42:40 -0300 Subject: [PATCH 2/4] fix: default to empty reference on pix2pix models to avoid segfault --- stable-diffusion.cpp | 35 ++++++++++++++++++++++++++--------- 1 file changed, 26 insertions(+), 9 deletions(-) diff --git a/stable-diffusion.cpp b/stable-diffusion.cpp index 21460b0e8..ba741bfac 100644 --- a/stable-diffusion.cpp +++ b/stable-diffusion.cpp @@ -448,6 +448,10 @@ class StableDiffusionGGML { diffusion_model->alloc_params_buffer(); diffusion_model->get_param_tensors(tensors); + if (sd_version_is_unet_edit(version)) { + vae_decode_only = false; + } + if (high_noise_diffusion_model) { high_noise_diffusion_model->alloc_params_buffer(); high_noise_diffusion_model->get_param_tensors(tensors); @@ -2319,23 +2323,36 @@ sd_image_t* generate_image(sd_ctx_t* sd_ctx, const sd_img_gen_params_t* sd_img_g init_latent = generate_init_latent(sd_ctx, work_ctx, width, height); } + sd_guidance_params_t guidance = sd_img_gen_params->sample_params.guidance; + std::vector ref_images; + for (int i = 0; i < sd_img_gen_params->ref_images_count; i++) { + ref_images.push_back(&sd_img_gen_params->ref_images[i]); + } + + std::vector empty_image_data; + sd_image_t empty_image = {(uint32_t)width, (uint32_t)height, 3, nullptr}; + if (ref_images.empty() && sd_version_is_unet_edit(sd_ctx->sd->version)) + { + LOG_WARN("This model needs at least one reference image; using an empty reference"); + empty_image_data.reserve(width * height * 3); + ref_images.push_back(&empty_image); + empty_image.data = empty_image_data.data(); + guidance.img_cfg = 0.f; + } + if (sd_img_gen_params->ref_images_count > 0) { LOG_INFO("EDIT mode"); } - else if (sd_ctx->sd->version == VERSION_SD1_PIX2PIX || sd_ctx->sd->version == VERSION_SDXL_PIX2PIX) { - LOG_ERROR("This model needs at least one reference image"); - return NULL; - } std::vector ref_latents; - for (int i = 0; i < sd_img_gen_params->ref_images_count; i++) { + for (int i = 0; i < ref_images.size(); i++) { ggml_tensor* img = ggml_new_tensor_4d(work_ctx, GGML_TYPE_F32, - sd_img_gen_params->ref_images[i].width, - sd_img_gen_params->ref_images[i].height, + ref_images[i]->width, + ref_images[i]->height, 3, 1); - sd_image_to_tensor(sd_img_gen_params->ref_images[i].data, img); + sd_image_to_tensor(ref_images[i]->data, img); ggml_tensor* latent = NULL; if (sd_ctx->sd->use_tiny_autoencoder) { @@ -2368,7 +2385,7 @@ sd_image_t* generate_image(sd_ctx_t* sd_ctx, const sd_img_gen_params_t* sd_img_g SAFE_STR(sd_img_gen_params->prompt), SAFE_STR(sd_img_gen_params->negative_prompt), sd_img_gen_params->clip_skip, - sd_img_gen_params->sample_params.guidance, + guidance, sd_img_gen_params->sample_params.eta, width, height, From 6567d97265eaffb3b17917ea2256695d5fb27ecc Mon Sep 17 00:00:00 2001 From: leejet Date: Thu, 18 Sep 2025 00:03:08 +0800 Subject: [PATCH 3/4] use resize instead of reserve --- stable-diffusion.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/stable-diffusion.cpp b/stable-diffusion.cpp index ba741bfac..b5ab97981 100644 --- a/stable-diffusion.cpp +++ b/stable-diffusion.cpp @@ -2334,7 +2334,7 @@ sd_image_t* generate_image(sd_ctx_t* sd_ctx, const sd_img_gen_params_t* sd_img_g if (ref_images.empty() && sd_version_is_unet_edit(sd_ctx->sd->version)) { LOG_WARN("This model needs at least one reference image; using an empty reference"); - empty_image_data.reserve(width * height * 3); + empty_image_data.resize(width * height * 3); ref_images.push_back(&empty_image); empty_image.data = empty_image_data.data(); guidance.img_cfg = 0.f; From 956c65423620a2270af2ea973eaa66c48c60704e Mon Sep 17 00:00:00 2001 From: leejet Date: Thu, 18 Sep 2025 00:08:56 +0800 Subject: [PATCH 4/4] format code --- stable-diffusion.cpp | 30 +++++++++++++++--------------- 1 file changed, 15 insertions(+), 15 deletions(-) diff --git a/stable-diffusion.cpp b/stable-diffusion.cpp index 92a6c467c..e4102e60d 100644 --- a/stable-diffusion.cpp +++ b/stable-diffusion.cpp @@ -752,15 +752,15 @@ class StableDiffusionGGML { denoiser->scheduler->version = version; break; case SGM_UNIFORM: - LOG_INFO("Running with SGM Uniform schedule"); - denoiser->scheduler = std::make_shared(); - denoiser->scheduler->version = version; - break; + LOG_INFO("Running with SGM Uniform schedule"); + denoiser->scheduler = std::make_shared(); + denoiser->scheduler->version = version; + break; case SIMPLE: - LOG_INFO("Running with Simple schedule"); - denoiser->scheduler = std::make_shared(); - denoiser->scheduler->version = version; - break; + LOG_INFO("Running with Simple schedule"); + denoiser->scheduler = std::make_shared(); + denoiser->scheduler->version = version; + break; case SMOOTHSTEP: LOG_INFO("Running with SmoothStep scheduler"); denoiser->scheduler = std::make_shared(); @@ -1057,7 +1057,7 @@ class StableDiffusionGGML { ggml_tensor* denoise_mask = NULL, ggml_tensor* vace_context = NULL, float vace_strength = 1.f) { - if (shifted_timestep > 0 && !sd_version_is_sdxl(version)) { + if (shifted_timestep > 0 && !sd_version_is_sdxl(version)) { LOG_WARN("timestep shifting is only supported for SDXL models!"); shifted_timestep = 0; } @@ -1131,7 +1131,7 @@ class StableDiffusionGGML { } else { timesteps_vec.assign(1, t); } - + timesteps_vec = process_timesteps(timesteps_vec, init_latent, denoise_mask); auto timesteps = vector_to_ggml_tensor(work_ctx, timesteps_vec); std::vector guidance_vec(1, guidance.distilled_guidance); @@ -2392,9 +2392,6 @@ sd_image_t* generate_image(sd_ctx_t* sd_ctx, const sd_img_gen_params_t* sd_img_g } sd_guidance_params_t guidance = sd_img_gen_params->sample_params.guidance; - if (sd_img_gen_params->ref_images_count > 0) { - LOG_INFO("EDIT mode"); - } std::vector ref_images; for (int i = 0; i < sd_img_gen_params->ref_images_count; i++) { ref_images.push_back(&sd_img_gen_params->ref_images[i]); @@ -2402,8 +2399,7 @@ sd_image_t* generate_image(sd_ctx_t* sd_ctx, const sd_img_gen_params_t* sd_img_g std::vector empty_image_data; sd_image_t empty_image = {(uint32_t)width, (uint32_t)height, 3, nullptr}; - if (ref_images.empty() && sd_version_is_unet_edit(sd_ctx->sd->version)) - { + if (ref_images.empty() && sd_version_is_unet_edit(sd_ctx->sd->version)) { LOG_WARN("This model needs at least one reference image; using an empty reference"); empty_image_data.resize(width * height * 3); ref_images.push_back(&empty_image); @@ -2411,6 +2407,10 @@ sd_image_t* generate_image(sd_ctx_t* sd_ctx, const sd_img_gen_params_t* sd_img_g guidance.img_cfg = 0.f; } + if (ref_images.size() > 0) { + LOG_INFO("EDIT mode"); + } + std::vector ref_latents; for (int i = 0; i < ref_images.size(); i++) { ggml_tensor* img = ggml_new_tensor_4d(work_ctx,