From 72ad845f4a17a42ee1dd68ba4669be678acb9fcd Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?St=C3=A9phane=20du=20Hamel?= Date: Wed, 30 Oct 2024 19:35:10 +0100 Subject: [PATCH 1/7] fast latent image preview --- examples/cli/main.cpp | 122 +++++++++++++++++++++++++++++++++++++++++- stable-diffusion.cpp | 18 +++++-- stable-diffusion.h | 5 +- 3 files changed, 138 insertions(+), 7 deletions(-) diff --git a/examples/cli/main.cpp b/examples/cli/main.cpp index 4b47286f4..527dd6273 100644 --- a/examples/cli/main.cpp +++ b/examples/cli/main.cpp @@ -765,6 +765,125 @@ void sd_log_cb(enum sd_log_level_t level, const char* log, void* data) { fflush(out_stream); } +// https://github.com/comfyanonymous/ComfyUI/blob/master/comfy/latent_formats.py#L152-L169 +const float flux_latent_rgb_proj[16][3] = { + {-0.0346, 0.0244, 0.0681}, + {0.0034, 0.0210, 0.0687}, + {0.0275, -0.0668, -0.0433}, + {-0.0174, 0.0160, 0.0617}, + {0.0859, 0.0721, 0.0329}, + {0.0004, 0.0383, 0.0115}, + {0.0405, 0.0861, 0.0915}, + {-0.0236, -0.0185, -0.0259}, + {-0.0245, 0.0250, 0.1180}, + {0.1008, 0.0755, -0.0421}, + {-0.0515, 0.0201, 0.0011}, + {0.0428, -0.0012, -0.0036}, + {0.0817, 0.0765, 0.0749}, + {-0.1264, -0.0522, -0.1103}, + {-0.0280, -0.0881, -0.0499}, + {-0.1262, -0.0982, -0.0778}}; + +// https://github.com/Stability-AI/sd3.5/blob/main/sd3_impls.py#L228-L246 +const float sd3_latent_rgb_proj[16][3] = { + {-0.0645, 0.0177, 0.1052}, + {0.0028, 0.0312, 0.0650}, + {0.1848, 0.0762, 0.0360}, + {0.0944, 0.0360, 0.0889}, + {0.0897, 0.0506, -0.0364}, + {-0.0020, 0.1203, 0.0284}, + {0.0855, 0.0118, 0.0283}, + {-0.0539, 0.0658, 0.1047}, + {-0.0057, 0.0116, 0.0700}, + {-0.0412, 0.0281, -0.0039}, + {0.1106, 0.1171, 0.1220}, + {-0.0248, 0.0682, -0.0481}, + {0.0815, 0.0846, 0.1207}, + {-0.0120, -0.0055, -0.0867}, + {-0.0749, -0.0634, -0.0456}, + {-0.1418, -0.1457, -0.1259}, +}; + +// https://github.com/comfyanonymous/ComfyUI/blob/master/comfy/latent_formats.py#L32-L38 +const float sdxl_latent_rgb_proj[4][3] = { + {0.3651, 0.4232, 0.4341}, + {-0.2533, -0.0042, 0.1068}, + {0.1076, 0.1111, -0.0362}, + {-0.3165, -0.2492, -0.2188}}; + +// https://github.com/comfyanonymous/ComfyUI/blob/master/comfy/latent_formats.py#L32-L38 +const float sd_latent_rgb_proj[4][3]{ + {0.3512, 0.2297, 0.3227}, + {0.3250, 0.4974, 0.2350}, + {-0.2829, 0.1762, 0.2721}, + {-0.2120, -0.2616, -0.7177}}; + +void step_callback(int step, struct ggml_tensor* latents, enum SDVersion version) { + const int channel = 3; + int width = latents->ne[0]; + int height = latents->ne[1]; + int dim = latents->ne[2]; + + const float(*latent_rgb_proj)[channel]; + + if (dim == 16) { + // 16 channels VAE -> Flux or SD3 + + if (version == VERSION_SD3_2B || version == VERSION_SD3_5_8B /* || version == VERSION_SD3_5_2B*/) { + latent_rgb_proj = sd3_latent_rgb_proj; + } else if (version == VERSION_FLUX_DEV || version == VERSION_FLUX_SCHNELL) { + latent_rgb_proj = flux_latent_rgb_proj; + } else { + // unknown model + return; + } + + } else if (dim == 4) { + // 4 channels VAE + if (version == VERSION_SDXL) { + latent_rgb_proj = sdxl_latent_rgb_proj; + } else if (version == VERSION_SD1 || version == VERSION_SD2) { + latent_rgb_proj = sd_latent_rgb_proj; + } else { + // unknown model + return; + } + } else { + // unknown latent space + return; + } + uint8_t* data = (uint8_t*)malloc(width * height * channel * sizeof(uint8_t)); + int data_head = 0; + for (int j = 0; j < height; j++) { + for (int i = 0; i < width; i++) { + int latent_id = (i * latents->nb[0] + j * latents->nb[1]); + float r = 0, g = 0, b = 0; + for (int d = 0; d < dim; d++) { + float value = *(float*)((char*)latents->data + latent_id + d * latents->nb[2]); + r += value * latent_rgb_proj[d][0]; + g += value * latent_rgb_proj[d][1]; + b += value * latent_rgb_proj[d][2]; + } + + // change range + r = r * .5 + .5; + g = g * .5 + .5; + b = b * .5 + .5; + + // clamp rgb values to [0,1] range + r = r >= 0 ? r <= 1 ? r : 1 : 0; + g = g >= 0 ? g <= 1 ? g : 1 : 0; + b = b >= 0 ? b <= 1 ? b : 1 : 0; + + data[data_head++] = (uint8_t)(r * 255.); + data[data_head++] = (uint8_t)(g * 255.); + data[data_head++] = (uint8_t)(b * 255.); + } + } + stbi_write_png("latent-preview.png", width, height, channel, data, 0); + free(data); +} + int main(int argc, const char* argv[]) { SDParams params; @@ -930,7 +1049,8 @@ int main(int argc, const char* argv[]) { params.skip_layers.size(), params.slg_scale, params.skip_layer_start, - params.skip_layer_end); + params.skip_layer_end, + step_callback); } else { sd_image_t input_image = {(uint32_t)params.width, (uint32_t)params.height, diff --git a/stable-diffusion.cpp b/stable-diffusion.cpp index 5abc29507..f44db931f 100644 --- a/stable-diffusion.cpp +++ b/stable-diffusion.cpp @@ -784,7 +784,8 @@ class StableDiffusionGGML { std::vector skip_layers = {}, float slg_scale = 0, float skip_layer_start = 0.01, - float skip_layer_end = 0.2) { + float skip_layer_end = 0.2, + std::function step_callback = nullptr) { size_t steps = sigmas.size() - 1; // noise = load_tensor_from_file(work_ctx, "./rand0.bin"); // print_ggml_tensor(noise); @@ -943,6 +944,9 @@ class StableDiffusionGGML { pretty_progress(step, (int)steps, (t1 - t0) / 1000000.f); // LOG_INFO("step %d sampling completed taking %.2fs", step, (t1 - t0) * 1.0f / 1000000); } + if (step_callback != nullptr) { + step_callback(step, denoised, version); + } return denoised; }; @@ -1166,7 +1170,8 @@ sd_image_t* generate_image(sd_ctx_t* sd_ctx, std::vector skip_layers = {}, float slg_scale = 0, float skip_layer_start = 0.01, - float skip_layer_end = 0.2) { + float skip_layer_end = 0.2, + std::function step_callback = nullptr) { if (seed < 0) { // Generally, when using the provided command line, the seed is always >0. // However, to prevent potential issues if 'stable-diffusion.cpp' is invoked as a library @@ -1388,7 +1393,8 @@ sd_image_t* generate_image(sd_ctx_t* sd_ctx, skip_layers, slg_scale, skip_layer_start, - skip_layer_end); + skip_layer_end, + step_callback); // struct ggml_tensor* x_0 = load_tensor_from_file(ctx, "samples_ddim.bin"); // print_ggml_tensor(x_0); int64_t sampling_end = ggml_time_ms(); @@ -1459,7 +1465,8 @@ sd_image_t* txt2img(sd_ctx_t* sd_ctx, size_t skip_layers_count = 0, float slg_scale = 0, float skip_layer_start = 0.01, - float skip_layer_end = 0.2) { + float skip_layer_end = 0.2, + step_callback_t step_callback) { std::vector skip_layers_vec(skip_layers, skip_layers + skip_layers_count); LOG_DEBUG("txt2img %dx%d", width, height); if (sd_ctx == NULL) { @@ -1532,7 +1539,8 @@ sd_image_t* txt2img(sd_ctx_t* sd_ctx, skip_layers_vec, slg_scale, skip_layer_start, - skip_layer_end); + skip_layer_end, + step_callback); size_t t1 = ggml_time_ms(); diff --git a/stable-diffusion.h b/stable-diffusion.h index c67bc8a32..56c69ffec 100644 --- a/stable-diffusion.h +++ b/stable-diffusion.h @@ -149,6 +149,8 @@ SD_API sd_ctx_t* new_sd_ctx(const char* model_path, SD_API void free_sd_ctx(sd_ctx_t* sd_ctx); +typedef void (*step_callback_t)(int, struct ggml_tensor*, enum SDVersion); + SD_API sd_image_t* txt2img(sd_ctx_t* sd_ctx, const char* prompt, const char* negative_prompt, @@ -170,7 +172,8 @@ SD_API sd_image_t* txt2img(sd_ctx_t* sd_ctx, size_t skip_layers_count, float slg_scale, float skip_layer_start, - float skip_layer_end); + float skip_layer_end, + step_callback_t step_callback = NULL); SD_API sd_image_t* img2img(sd_ctx_t* sd_ctx, sd_image_t init_image, From 5aa5b60628316607ed881b1884b171e61d8b4dc5 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?St=C3=A9phane=20du=20Hamel?= Date: Wed, 30 Oct 2024 20:18:38 +0100 Subject: [PATCH 2/7] fix posix compile --- examples/cli/main.cpp | 2 +- stable-diffusion.h | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/examples/cli/main.cpp b/examples/cli/main.cpp index 527dd6273..1448e13f3 100644 --- a/examples/cli/main.cpp +++ b/examples/cli/main.cpp @@ -1050,7 +1050,7 @@ int main(int argc, const char* argv[]) { params.slg_scale, params.skip_layer_start, params.skip_layer_end, - step_callback); + (step_callback_t)step_callback); } else { sd_image_t input_image = {(uint32_t)params.width, (uint32_t)params.height, diff --git a/stable-diffusion.h b/stable-diffusion.h index 56c69ffec..262dc80f5 100644 --- a/stable-diffusion.h +++ b/stable-diffusion.h @@ -149,7 +149,7 @@ SD_API sd_ctx_t* new_sd_ctx(const char* model_path, SD_API void free_sd_ctx(sd_ctx_t* sd_ctx); -typedef void (*step_callback_t)(int, struct ggml_tensor*, enum SDVersion); +typedef void (*step_callback_t)(int, struct ggml_tensor*, int); SD_API sd_image_t* txt2img(sd_ctx_t* sd_ctx, const char* prompt, From 2515dabe835e1d89bf85497d5f648fb4527730cc Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?St=C3=A9phane=20du=20Hamel?= Date: Sun, 24 Nov 2024 19:07:52 +0100 Subject: [PATCH 3/7] preview: use new helper functions --- examples/cli/main.cpp | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/examples/cli/main.cpp b/examples/cli/main.cpp index 1448e13f3..65479fcd0 100644 --- a/examples/cli/main.cpp +++ b/examples/cli/main.cpp @@ -829,9 +829,9 @@ void step_callback(int step, struct ggml_tensor* latents, enum SDVersion version if (dim == 16) { // 16 channels VAE -> Flux or SD3 - if (version == VERSION_SD3_2B || version == VERSION_SD3_5_8B /* || version == VERSION_SD3_5_2B*/) { + if (sd_version_is_sd3(version)) { latent_rgb_proj = sd3_latent_rgb_proj; - } else if (version == VERSION_FLUX_DEV || version == VERSION_FLUX_SCHNELL) { + } else if (sd_version_is_flux(version)) { latent_rgb_proj = flux_latent_rgb_proj; } else { // unknown model From ce4f97608a88b6ce13092a9d24f20b1be19f25dd Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?St=C3=A9phane=20du=20Hamel?= Date: Mon, 25 Nov 2024 00:28:35 +0100 Subject: [PATCH 4/7] move latent preview code to a separate file --- examples/cli/main.cpp | 83 +++---------------------------------------- latent-preview.h | 83 +++++++++++++++++++++++++++++++++++++++++++ 2 files changed, 87 insertions(+), 79 deletions(-) create mode 100644 latent-preview.h diff --git a/examples/cli/main.cpp b/examples/cli/main.cpp index 65479fcd0..5e1e747d1 100644 --- a/examples/cli/main.cpp +++ b/examples/cli/main.cpp @@ -10,6 +10,8 @@ #include "flux.hpp" #include "stable-diffusion.h" +#include "latent-preview.h" + #define STB_IMAGE_IMPLEMENTATION #define STB_IMAGE_STATIC #include "stb_image.h" @@ -765,59 +767,6 @@ void sd_log_cb(enum sd_log_level_t level, const char* log, void* data) { fflush(out_stream); } -// https://github.com/comfyanonymous/ComfyUI/blob/master/comfy/latent_formats.py#L152-L169 -const float flux_latent_rgb_proj[16][3] = { - {-0.0346, 0.0244, 0.0681}, - {0.0034, 0.0210, 0.0687}, - {0.0275, -0.0668, -0.0433}, - {-0.0174, 0.0160, 0.0617}, - {0.0859, 0.0721, 0.0329}, - {0.0004, 0.0383, 0.0115}, - {0.0405, 0.0861, 0.0915}, - {-0.0236, -0.0185, -0.0259}, - {-0.0245, 0.0250, 0.1180}, - {0.1008, 0.0755, -0.0421}, - {-0.0515, 0.0201, 0.0011}, - {0.0428, -0.0012, -0.0036}, - {0.0817, 0.0765, 0.0749}, - {-0.1264, -0.0522, -0.1103}, - {-0.0280, -0.0881, -0.0499}, - {-0.1262, -0.0982, -0.0778}}; - -// https://github.com/Stability-AI/sd3.5/blob/main/sd3_impls.py#L228-L246 -const float sd3_latent_rgb_proj[16][3] = { - {-0.0645, 0.0177, 0.1052}, - {0.0028, 0.0312, 0.0650}, - {0.1848, 0.0762, 0.0360}, - {0.0944, 0.0360, 0.0889}, - {0.0897, 0.0506, -0.0364}, - {-0.0020, 0.1203, 0.0284}, - {0.0855, 0.0118, 0.0283}, - {-0.0539, 0.0658, 0.1047}, - {-0.0057, 0.0116, 0.0700}, - {-0.0412, 0.0281, -0.0039}, - {0.1106, 0.1171, 0.1220}, - {-0.0248, 0.0682, -0.0481}, - {0.0815, 0.0846, 0.1207}, - {-0.0120, -0.0055, -0.0867}, - {-0.0749, -0.0634, -0.0456}, - {-0.1418, -0.1457, -0.1259}, -}; - -// https://github.com/comfyanonymous/ComfyUI/blob/master/comfy/latent_formats.py#L32-L38 -const float sdxl_latent_rgb_proj[4][3] = { - {0.3651, 0.4232, 0.4341}, - {-0.2533, -0.0042, 0.1068}, - {0.1076, 0.1111, -0.0362}, - {-0.3165, -0.2492, -0.2188}}; - -// https://github.com/comfyanonymous/ComfyUI/blob/master/comfy/latent_formats.py#L32-L38 -const float sd_latent_rgb_proj[4][3]{ - {0.3512, 0.2297, 0.3227}, - {0.3250, 0.4974, 0.2350}, - {-0.2829, 0.1762, 0.2721}, - {-0.2120, -0.2616, -0.7177}}; - void step_callback(int step, struct ggml_tensor* latents, enum SDVersion version) { const int channel = 3; int width = latents->ne[0]; @@ -853,33 +802,9 @@ void step_callback(int step, struct ggml_tensor* latents, enum SDVersion version return; } uint8_t* data = (uint8_t*)malloc(width * height * channel * sizeof(uint8_t)); - int data_head = 0; - for (int j = 0; j < height; j++) { - for (int i = 0; i < width; i++) { - int latent_id = (i * latents->nb[0] + j * latents->nb[1]); - float r = 0, g = 0, b = 0; - for (int d = 0; d < dim; d++) { - float value = *(float*)((char*)latents->data + latent_id + d * latents->nb[2]); - r += value * latent_rgb_proj[d][0]; - g += value * latent_rgb_proj[d][1]; - b += value * latent_rgb_proj[d][2]; - } - - // change range - r = r * .5 + .5; - g = g * .5 + .5; - b = b * .5 + .5; - - // clamp rgb values to [0,1] range - r = r >= 0 ? r <= 1 ? r : 1 : 0; - g = g >= 0 ? g <= 1 ? g : 1 : 0; - b = b >= 0 ? b <= 1 ? b : 1 : 0; + + preview_latent_image(data, latents, latent_rgb_proj, width, height, dim); - data[data_head++] = (uint8_t)(r * 255.); - data[data_head++] = (uint8_t)(g * 255.); - data[data_head++] = (uint8_t)(b * 255.); - } - } stbi_write_png("latent-preview.png", width, height, channel, data, 0); free(data); } diff --git a/latent-preview.h b/latent-preview.h new file mode 100644 index 000000000..5457c47ed --- /dev/null +++ b/latent-preview.h @@ -0,0 +1,83 @@ + +// https://github.com/comfyanonymous/ComfyUI/blob/master/comfy/latent_formats.py#L152-L169 +const float flux_latent_rgb_proj[16][3] = { + {-0.0346, 0.0244, 0.0681}, + {0.0034, 0.0210, 0.0687}, + {0.0275, -0.0668, -0.0433}, + {-0.0174, 0.0160, 0.0617}, + {0.0859, 0.0721, 0.0329}, + {0.0004, 0.0383, 0.0115}, + {0.0405, 0.0861, 0.0915}, + {-0.0236, -0.0185, -0.0259}, + {-0.0245, 0.0250, 0.1180}, + {0.1008, 0.0755, -0.0421}, + {-0.0515, 0.0201, 0.0011}, + {0.0428, -0.0012, -0.0036}, + {0.0817, 0.0765, 0.0749}, + {-0.1264, -0.0522, -0.1103}, + {-0.0280, -0.0881, -0.0499}, + {-0.1262, -0.0982, -0.0778}}; + +// https://github.com/Stability-AI/sd3.5/blob/main/sd3_impls.py#L228-L246 +const float sd3_latent_rgb_proj[16][3] = { + {-0.0645, 0.0177, 0.1052}, + {0.0028, 0.0312, 0.0650}, + {0.1848, 0.0762, 0.0360}, + {0.0944, 0.0360, 0.0889}, + {0.0897, 0.0506, -0.0364}, + {-0.0020, 0.1203, 0.0284}, + {0.0855, 0.0118, 0.0283}, + {-0.0539, 0.0658, 0.1047}, + {-0.0057, 0.0116, 0.0700}, + {-0.0412, 0.0281, -0.0039}, + {0.1106, 0.1171, 0.1220}, + {-0.0248, 0.0682, -0.0481}, + {0.0815, 0.0846, 0.1207}, + {-0.0120, -0.0055, -0.0867}, + {-0.0749, -0.0634, -0.0456}, + {-0.1418, -0.1457, -0.1259}, +}; + +// https://github.com/comfyanonymous/ComfyUI/blob/master/comfy/latent_formats.py#L32-L38 +const float sdxl_latent_rgb_proj[4][3] = { + {0.3651, 0.4232, 0.4341}, + {-0.2533, -0.0042, 0.1068}, + {0.1076, 0.1111, -0.0362}, + {-0.3165, -0.2492, -0.2188}}; + +// https://github.com/comfyanonymous/ComfyUI/blob/master/comfy/latent_formats.py#L32-L38 +const float sd_latent_rgb_proj[4][3]{ + {0.3512, 0.2297, 0.3227}, + {0.3250, 0.4974, 0.2350}, + {-0.2829, 0.1762, 0.2721}, + {-0.2120, -0.2616, -0.7177}}; + +void preview_latent_image(uint8_t* buffer, struct ggml_tensor* latents, const float (*latent_rgb_proj)[3], int width, int height, int dim) { + size_t buffer_head = 0; + for (int j = 0; j < height; j++) { + for (int i = 0; i < width; i++) { + size_t latent_id = (i * latents->nb[0] + j * latents->nb[1]); + float r = 0, g = 0, b = 0; + for (int d = 0; d < dim; d++) { + float value = *(float*)((char*)latents->data + latent_id + d * latents->nb[2]); + r += value * latent_rgb_proj[d][0]; + g += value * latent_rgb_proj[d][1]; + b += value * latent_rgb_proj[d][2]; + } + + // change range + r = r * .5f + .5f; + g = g * .5f + .5f; + b = b * .5f + .5f; + + // clamp rgb values to [0,1] range + r = r >= 0 ? r <= 1 ? r : 1 : 0; + g = g >= 0 ? g <= 1 ? g : 1 : 0; + b = b >= 0 ? b <= 1 ? b : 1 : 0; + + buffer[buffer_head++] = (uint8_t)(r * 255); + buffer[buffer_head++] = (uint8_t)(g * 255); + buffer[buffer_head++] = (uint8_t)(b * 255); + } + } +} \ No newline at end of file From 4174f301a5e15b4b7729ba9fb794259c905a2007 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?St=C3=A9phane=20du=20Hamel?= Date: Tue, 26 Nov 2024 11:43:26 +0100 Subject: [PATCH 5/7] No defaults in c code --- stable-diffusion.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/stable-diffusion.h b/stable-diffusion.h index 262dc80f5..74cc3076b 100644 --- a/stable-diffusion.h +++ b/stable-diffusion.h @@ -173,7 +173,7 @@ SD_API sd_image_t* txt2img(sd_ctx_t* sd_ctx, float slg_scale, float skip_layer_start, float skip_layer_end, - step_callback_t step_callback = NULL); + step_callback_t step_callback); SD_API sd_image_t* img2img(sd_ctx_t* sd_ctx, sd_image_t init_image, From 68373871a376cd08ac7ba3e2eb5771ab3900674d Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?St=C3=A9phane=20du=20Hamel?= Date: Tue, 26 Nov 2024 11:56:57 +0100 Subject: [PATCH 6/7] Latent preview support for img2img and img2vid --- examples/cli/main.cpp | 6 ++++-- stable-diffusion.cpp | 32 +++++++++++++++++++------------- stable-diffusion.h | 6 ++++-- 3 files changed, 27 insertions(+), 17 deletions(-) diff --git a/examples/cli/main.cpp b/examples/cli/main.cpp index 5e1e747d1..d79928170 100644 --- a/examples/cli/main.cpp +++ b/examples/cli/main.cpp @@ -996,7 +996,8 @@ int main(int argc, const char* argv[]) { params.sample_method, params.sample_steps, params.strength, - params.seed); + params.seed, + (step_callback_t)step_callback); if (results == NULL) { printf("generate failed\n"); free_sd_ctx(sd_ctx); @@ -1042,7 +1043,8 @@ int main(int argc, const char* argv[]) { params.skip_layers.size(), params.slg_scale, params.skip_layer_start, - params.skip_layer_end); + params.skip_layer_end, + (step_callback_t)step_callback); } } diff --git a/stable-diffusion.cpp b/stable-diffusion.cpp index f44db931f..8eedd9988 100644 --- a/stable-diffusion.cpp +++ b/stable-diffusion.cpp @@ -781,10 +781,10 @@ class StableDiffusionGGML { const std::vector& sigmas, int start_merge_step, SDCondition id_cond, - std::vector skip_layers = {}, - float slg_scale = 0, - float skip_layer_start = 0.01, - float skip_layer_end = 0.2, + std::vector skip_layers = {}, + float slg_scale = 0, + float skip_layer_start = 0.01, + float skip_layer_end = 0.2, std::function step_callback = nullptr) { size_t steps = sigmas.size() - 1; // noise = load_tensor_from_file(work_ctx, "./rand0.bin"); @@ -1167,10 +1167,10 @@ sd_image_t* generate_image(sd_ctx_t* sd_ctx, float style_ratio, bool normalize_input, std::string input_id_images_path, - std::vector skip_layers = {}, - float slg_scale = 0, - float skip_layer_start = 0.01, - float skip_layer_end = 0.2, + std::vector skip_layers = {}, + float slg_scale = 0, + float skip_layer_start = 0.01, + float skip_layer_end = 0.2, std::function step_callback = nullptr) { if (seed < 0) { // Generally, when using the provided command line, the seed is always >0. @@ -1466,7 +1466,7 @@ sd_image_t* txt2img(sd_ctx_t* sd_ctx, float slg_scale = 0, float skip_layer_start = 0.01, float skip_layer_end = 0.2, - step_callback_t step_callback) { + step_callback_t step_callback = NULL) { std::vector skip_layers_vec(skip_layers, skip_layers + skip_layers_count); LOG_DEBUG("txt2img %dx%d", width, height); if (sd_ctx == NULL) { @@ -1572,7 +1572,8 @@ sd_image_t* img2img(sd_ctx_t* sd_ctx, size_t skip_layers_count = 0, float slg_scale = 0, float skip_layer_start = 0.01, - float skip_layer_end = 0.2) { + float skip_layer_end = 0.2, + step_callback_t step_callback = NULL) { std::vector skip_layers_vec(skip_layers, skip_layers + skip_layers_count); LOG_DEBUG("img2img %dx%d", width, height); if (sd_ctx == NULL) { @@ -1651,7 +1652,8 @@ sd_image_t* img2img(sd_ctx_t* sd_ctx, skip_layers_vec, slg_scale, skip_layer_start, - skip_layer_end); + skip_layer_end, + step_callback); size_t t2 = ggml_time_ms(); @@ -1673,7 +1675,8 @@ SD_API sd_image_t* img2vid(sd_ctx_t* sd_ctx, enum sample_method_t sample_method, int sample_steps, float strength, - int64_t seed) { + int64_t seed, + step_callback_t step_callback = NULL) { if (sd_ctx == NULL) { return NULL; } @@ -1752,7 +1755,10 @@ SD_API sd_image_t* img2vid(sd_ctx_t* sd_ctx, sample_method, sigmas, -1, - SDCondition(NULL, NULL, NULL)); + SDCondition(NULL, NULL, NULL), + {}, + 0, 0, 0, + step_callback); int64_t t2 = ggml_time_ms(); LOG_INFO("sampling completed, taking %.2fs", (t2 - t1) * 1.0f / 1000); diff --git a/stable-diffusion.h b/stable-diffusion.h index 74cc3076b..ad36f7f6b 100644 --- a/stable-diffusion.h +++ b/stable-diffusion.h @@ -198,7 +198,8 @@ SD_API sd_image_t* img2img(sd_ctx_t* sd_ctx, size_t skip_layers_count, float slg_scale, float skip_layer_start, - float skip_layer_end); + float skip_layer_end, + step_callback_t step_callback); SD_API sd_image_t* img2vid(sd_ctx_t* sd_ctx, sd_image_t init_image, @@ -213,7 +214,8 @@ SD_API sd_image_t* img2vid(sd_ctx_t* sd_ctx, enum sample_method_t sample_method, int sample_steps, float strength, - int64_t seed); + int64_t seed, + step_callback_t step_callback); typedef struct upscaler_ctx_t upscaler_ctx_t; From 6fdc230f9fe610deac8276e657d10740a4a9b555 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?St=C3=A9phane=20du=20Hamel?= Date: Tue, 26 Nov 2024 12:06:53 +0100 Subject: [PATCH 7/7] add latent-preview to .gitignore --- .gitignore | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/.gitignore b/.gitignore index 38fe570df..2e520df2c 100644 --- a/.gitignore +++ b/.gitignore @@ -10,4 +10,5 @@ test/ *.gguf output*.png models* -*.log \ No newline at end of file +*.log +latent-preview.png