Skip to content
Open
6 changes: 6 additions & 0 deletions CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -32,6 +32,7 @@ option(SD_SYCL "sd: sycl backend" OFF)
option(SD_MUSA "sd: musa backend" OFF)
option(SD_FAST_SOFTMAX "sd: x1.5 faster softmax, indeterministic (sometimes, same seed don't generate same image), cuda only" OFF)
option(SD_BUILD_SHARED_LIBS "sd: build shared libs" OFF)
option(SD_SHOW_REMAINING_TIME "sd: show remaining and average sampling time" OFF)
#option(SD_BUILD_SERVER "sd: build server example" ON)

if(SD_CUDA)
Expand Down Expand Up @@ -93,6 +94,11 @@ else()
add_library(${SD_LIB} STATIC ${SD_LIB_SOURCES})
endif()

if (SD_SHOW_REMAINING_TIME)
message("-- Display remaining and average sampling time")
add_definitions(-DSD_SHOW_REMAINING_TIME)
endif ()

if(SD_SYCL)
message("-- Use SYCL as backend stable-diffusion")
set(GGML_SYCL ON)
Expand Down
28 changes: 28 additions & 0 deletions denoiser.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -468,6 +468,20 @@ struct FluxFlowDenoiser : public Denoiser {

typedef std::function<ggml_tensor*(ggml_tensor*, float, int)> denoise_cb_t;

static inline void show_step(int i0, int im, int64_t* t0) {
#ifdef SD_SHOW_REMAINING_TIME
int i = i0 + 1;
float t1 = (ggml_time_us() - *t0) / 1000000.f / i;
pretty_progress(i, im, t1, t1 * (im - i));
// LOG_INFO("step %d sampling completed taking %.2fs", i, (t1 - *t0) * 1.0f / 1000000 / i);
#else // SD_SHOW_REMAINING_TIME
int64_t t1 = ggml_time_us();
pretty_progress(i0 + 1, im, (t1 - *t0) / 1000000.f);
// LOG_INFO("step %d sampling completed taking %.2fs", i0 + 1, (t1 - *t0) * 1.0f / 1000000);
*t0 = t1;
#endif // SD_SHOW_REMAINING_TIME
}

// k diffusion reverse ODE: dx = (x - D(x;\sigma)) / \sigma dt; \sigma(t) = t
static void sample_k_diffusion(sample_method_t method,
denoise_cb_t model,
Expand All @@ -477,6 +491,8 @@ static void sample_k_diffusion(sample_method_t method,
std::shared_ptr<RNG> rng,
float eta) {
size_t steps = sigmas.size() - 1;
int64_t t0 = ggml_time_us();

// sample_euler_ancestral
switch (method) {
case EULER_A: {
Expand Down Expand Up @@ -530,6 +546,7 @@ static void sample_k_diffusion(sample_method_t method,
}
}
}
show_step(i, steps, &t0);
}
} break;
case EULER: // Implemented without any sigma churn
Expand Down Expand Up @@ -563,6 +580,7 @@ static void sample_k_diffusion(sample_method_t method,
vec_x[j] = vec_x[j] + vec_d[j] * dt;
}
}
show_step(i, steps, &t0);
}
} break;
case HEUN: {
Expand Down Expand Up @@ -613,6 +631,7 @@ static void sample_k_diffusion(sample_method_t method,
vec_x[j] = vec_x[j] + vec_d[j] * dt;
}
}
show_step(i, steps, &t0);
}
} break;
case DPM2: {
Expand Down Expand Up @@ -664,6 +683,7 @@ static void sample_k_diffusion(sample_method_t method,
vec_x[j] = vec_x[j] + d2 * dt_2;
}
}
show_step(i, steps, &t0);
}

} break;
Expand Down Expand Up @@ -738,6 +758,7 @@ static void sample_k_diffusion(sample_method_t method,
}
}
}
show_step(i, steps, &t0);
}
} break;
case DPMPP2M: // DPM++ (2M) from Karras et al (2022)
Expand Down Expand Up @@ -777,6 +798,7 @@ static void sample_k_diffusion(sample_method_t method,
for (int j = 0; j < ggml_nelements(x); j++) {
vec_old_denoised[j] = vec_denoised[j];
}
show_step(i, steps, &t0);
}
} break;
case DPMPP2Mv2: // Modified DPM++ (2M) from https://github.com/AUTOMATIC1111/stable-diffusion-webui/discussions/8457
Expand Down Expand Up @@ -820,6 +842,7 @@ static void sample_k_diffusion(sample_method_t method,
for (int j = 0; j < ggml_nelements(x); j++) {
vec_old_denoised[j] = vec_denoised[j];
}
show_step(i, steps, &t0);
}
} break;
case IPNDM: // iPNDM sampler from https://github.com/zju-pi/diff-sampler/tree/main/diff-solvers-main
Expand Down Expand Up @@ -895,6 +918,7 @@ static void sample_k_diffusion(sample_method_t method,
} else {
buffer_model.push_back(d_cur);
}
show_step(i, steps, &t0);
}
} break;
case IPNDM_V: // iPNDM_v sampler from https://github.com/zju-pi/diff-sampler/tree/main/diff-solvers-main
Expand Down Expand Up @@ -969,6 +993,7 @@ static void sample_k_diffusion(sample_method_t method,

// Prepare the next d tensor
d_cur = ggml_dup_tensor(work_ctx, x_next);
show_step(i, steps, &t0);
}
} break;
case LCM: // Latent Consistency Models
Expand Down Expand Up @@ -1004,6 +1029,7 @@ static void sample_k_diffusion(sample_method_t method,
}
}
}
show_step(i, steps, &t0);
}
} break;
case DDIM_TRAILING: // Denoising Diffusion Implicit Models
Expand Down Expand Up @@ -1198,6 +1224,7 @@ static void sample_k_diffusion(sample_method_t method,
// needs to be prescaled again, since k-diffusion's
// model() differes from the bare U-net F_theta by the
// factor c_in.
show_step(i, steps, &t0);
}
} break;
case TCD: // Strategic Stochastic Sampling (Algorithm 4) in
Expand Down Expand Up @@ -1372,6 +1399,7 @@ static void sample_k_diffusion(sample_method_t method,
vec_noise[j];
}
}
show_step(i, steps, &t0);
}
} break;

Expand Down
6 changes: 0 additions & 6 deletions stable-diffusion.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -851,7 +851,6 @@ class StableDiffusionGGML {
if (step == 1) {
pretty_progress(0, (int)steps, 0);
}
int64_t t0 = ggml_time_us();

std::vector<float> scaling = denoiser->get_scalings(sigma);
GGML_ASSERT(scaling.size() == 3);
Expand Down Expand Up @@ -970,11 +969,6 @@ class StableDiffusionGGML {
// denoised = (v * c_out + input * c_skip) or (input + eps * c_out)
vec_denoised[i] = latent_result * c_out + vec_input[i] * c_skip;
}
int64_t t1 = ggml_time_us();
if (step > 0) {
pretty_progress(step, (int)steps, (t1 - t0) / 1000000.f);
// LOG_INFO("step %d sampling completed taking %.2fs", step, (t1 - t0) * 1.0f / 1000000);
}
if (noise_mask != nullptr) {
for (int64_t x = 0; x < denoised->ne[0]; x++) {
for (int64_t y = 0; y < denoised->ne[1]; y++) {
Expand Down
45 changes: 44 additions & 1 deletion util.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -370,6 +370,49 @@ void pretty_progress(int step, int steps, float time) {
}
}

#ifdef SD_SHOW_REMAINING_TIME
void pretty_progress(int step, int steps, float time, float left) {
if (sd_progress_cb) {
sd_progress_cb(step, steps, time, sd_progress_cb_data);
return;
}
if (step == 0) {
return;
}
std::string progress = " |";
int max_progress = 50;
int32_t current = (int32_t)(step * 1.f * max_progress / steps);
for (int i = 0; i < 50; i++) {
if (i > current) {
progress += " ";
} else if (i == current && i != max_progress - 1) {
progress += ">";
} else {
progress += "=";
}
}
progress += "|";
printf(time > 1.0f ? "\r%s %i/%i - %.2fs/it" : "\r%s %i/%i - %.2fit/s\033[K",
progress.c_str(), step, steps,
time > 1.0f || time == 0 ? time : (1.0f / time));
if (left >= 60.0f) {
/* same number of spaces and backspaces */
printf(", %.0fm %.2fs left \b\b\b\b\b\b\b\b\b",
/* min appears faster than mul+div for me, 19.31s vs 19.34s average */
floor(left / 60.0f), std::min(59.99f, fmod(left, 60.0f)));
//floor(left / 60.0f), floor(fmod(left, 60.0f) * 100.0f) / 100.0f);
} else if (left > 0) {
printf(", %.2fs left \b\b\b\b\b\b\b\b\b\b\b\b\b\b\b", left);
} else {
printf(" \b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b");
}
fflush(stdout); // for linux
if (step == steps) {
printf("\n");
}
}
#endif // SD_SHOW_REMAINING_TIME

std::string ltrim(const std::string& s) {
auto it = std::find_if(s.begin(), s.end(), [](int ch) {
return !std::isspace(ch);
Expand Down Expand Up @@ -699,4 +742,4 @@ std::vector<std::pair<std::string, float>> parse_prompt_attention(const std::str
}

return res;
}
}
3 changes: 3 additions & 0 deletions util.h
Original file line number Diff line number Diff line change
Expand Up @@ -47,6 +47,9 @@ sd_image_f32_t clip_preprocess(sd_image_f32_t image, int size);
std::string path_join(const std::string& p1, const std::string& p2);
std::vector<std::string> splitString(const std::string& str, char delimiter);
void pretty_progress(int step, int steps, float time);
#ifdef SD_SHOW_REMAINING_TIME
void pretty_progress(int step, int steps, float time, float left);
#endif // SD_SHOW_REMAINING_TIME

void log_printf(sd_log_level_t level, const char* file, int line, const char* format, ...);

Expand Down
Loading