Skip to content
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
5 changes: 3 additions & 2 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -326,11 +326,12 @@ arguments:
--slg-scale SCALE skip layer guidance (SLG) scale, only for DiT models: (default: 0)
0 means disabled, a value of 2.5 is nice for sd3.5 medium
--eta SCALE eta in DDIM, only for DDIM and TCD: (default: 0)
--ge-gamma SCALE gamma for gradient estimation sampler: (default: 2.0)
--skip-layers LAYERS Layers to skip for SLG steps: (default: [7,8,9])
--skip-layer-start START SLG enabling point: (default: 0.01)
--skip-layer-end END SLG disabling point: (default: 0.2)
--scheduler {discrete, karras, exponential, ays, gits, smoothstep, sgm_uniform, simple} Denoiser sigma scheduler (default: discrete)
--sampling-method {euler, euler_a, heun, dpm2, dpm++2s_a, dpm++2m, dpm++2mv2, ipndm, ipndm_v, lcm, ddim_trailing, tcd}
--sampling-method {euler, euler_a, heun, dpm2, dpm++2s_a, dpm++2m, dpm++2mv2, ipndm, ipndm_v, lcm, ddim_trailing, tcd, gradient_estimation}
sampling method (default: "euler" for Flux/SD3/Wan, "euler_a" otherwise)
--timestep-shift N shift timestep for NitroFusion models, default: 0, recommended N for NitroSD-Realism around 250 and 500 for NitroSD-Vibrant
--steps STEPS number of sample steps (default: 20)
Expand All @@ -344,7 +345,7 @@ arguments:
--high-noise-skip-layer-start (high noise) SLG enabling point: (default: 0.01)
--high-noise-skip-layer-end END (high noise) SLG disabling point: (default: 0.2)
--high-noise-scheduler {discrete, karras, exponential, ays, gits, smoothstep, sgm_uniform, simple} Denoiser sigma scheduler (default: discrete)
--high-noise-sampling-method {euler, euler_a, heun, dpm2, dpm++2s_a, dpm++2m, dpm++2mv2, ipndm, ipndm_v, lcm, ddim_trailing, tcd}
--high-noise-sampling-method {euler, euler_a, heun, dpm2, dpm++2s_a, dpm++2m, dpm++2mv2, ipndm, ipndm_v, lcm, ddim_trailing, tcd, gradient_estimation}
(high noise) sampling method (default: "euler_a")
--high-noise-steps STEPS (high noise) number of sample steps (default: -1 = auto)
SLG will be enabled at step int([STEPS]*[START]) and disabled at int([STEPS]*[END])
Expand Down
48 changes: 47 additions & 1 deletion denoiser.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -576,7 +576,8 @@ static void sample_k_diffusion(sample_method_t method,
ggml_tensor* x,
std::vector<float> sigmas,
std::shared_ptr<RNG> rng,
float eta) {
float eta,
float ge_gamma) {
size_t steps = sigmas.size() - 1;
// sample_euler_ancestral
switch (method) {
Expand Down Expand Up @@ -1462,7 +1463,52 @@ static void sample_k_diffusion(sample_method_t method,
}
}
} break;
case GRADIENT_ESTIMATION: {
struct ggml_tensor* d = ggml_dup_tensor(work_ctx, x);
struct ggml_tensor* old_d = ggml_dup_tensor(work_ctx, x);
bool has_old_d = false;

for (int i = 0; i < steps; i++) {
float sigma = sigmas[i];

ggml_tensor* denoised = model(x, sigma, i + 1);

// d = (x - denoised) / sigma
float* vec_d = (float*)d->data;
float* vec_x = (float*)x->data;
float* vec_denoised = (float*)denoised->data;

for (int j = 0; j < ggml_nelements(d); j++) {
vec_d[j] = (vec_x[j] - vec_denoised[j]) / sigma;
}

float dt = sigmas[i + 1] - sigma;

if (sigmas[i + 1] == 0) {
// Denoising step
for (int j = 0; j < ggml_nelements(x); j++) {
vec_x[j] = vec_denoised[j];
}
} else {
// Euler method
for (int j = 0; j < ggml_nelements(x); j++) {
vec_x[j] = vec_x[j] + vec_d[j] * dt;
}
}

if (has_old_d) {
// Gradient estimation
float* vec_old_d = (float*)old_d->data;
for (int j = 0; j < ggml_nelements(x); j++) {
float d_bar = (ge_gamma - 1.f) * (vec_d[j] - vec_old_d[j]);
vec_x[j] = vec_x[j] + d_bar * dt;
}
}
// old_d = d
copy_ggml_tensor(old_d, d);
has_old_d = true;
}
} break;
default:
LOG_ERROR("Attempting to sample with nonexisting sample method %i", method);
abort();
Expand Down
6 changes: 4 additions & 2 deletions examples/cli/main.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -248,11 +248,12 @@ void print_usage(int argc, const char* argv[]) {
printf(" --slg-scale SCALE skip layer guidance (SLG) scale, only for DiT models: (default: 0)\n");
printf(" 0 means disabled, a value of 2.5 is nice for sd3.5 medium\n");
printf(" --eta SCALE eta in DDIM, only for DDIM and TCD: (default: 0)\n");
printf(" --ge-gamma SCALE gamma for gradient estimation sampler: (default: 2.0)\n");
printf(" --skip-layers LAYERS Layers to skip for SLG steps: (default: [7,8,9])\n");
printf(" --skip-layer-start START SLG enabling point: (default: 0.01)\n");
printf(" --skip-layer-end END SLG disabling point: (default: 0.2)\n");
printf(" --scheduler {discrete, karras, exponential, ays, gits, smoothstep, sgm_uniform, simple} Denoiser sigma scheduler (default: discrete)\n");
printf(" --sampling-method {euler, euler_a, heun, dpm2, dpm++2s_a, dpm++2m, dpm++2mv2, ipndm, ipndm_v, lcm, ddim_trailing, tcd}\n");
printf(" --sampling-method {euler, euler_a, heun, dpm2, dpm++2s_a, dpm++2m, dpm++2mv2, ipndm, ipndm_v, lcm, ddim_trailing, tcd, gradient_estimation}\n");
printf(" sampling method (default: \"euler\" for Flux/SD3/Wan, \"euler_a\" otherwise)\n");
printf(" --timestep-shift N shift timestep for NitroFusion models, default: 0, recommended N for NitroSD-Realism around 250 and 500 for NitroSD-Vibrant\n");
printf(" --steps STEPS number of sample steps (default: 20)\n");
Expand All @@ -266,7 +267,7 @@ void print_usage(int argc, const char* argv[]) {
printf(" --high-noise-skip-layer-start (high noise) SLG enabling point: (default: 0.01)\n");
printf(" --high-noise-skip-layer-end END (high noise) SLG disabling point: (default: 0.2)\n");
printf(" --high-noise-scheduler {discrete, karras, exponential, ays, gits, smoothstep, sgm_uniform, simple} Denoiser sigma scheduler (default: discrete)\n");
printf(" --high-noise-sampling-method {euler, euler_a, heun, dpm2, dpm++2s_a, dpm++2m, dpm++2mv2, ipndm, ipndm_v, lcm, ddim_trailing, tcd}\n");
printf(" --high-noise-sampling-method {euler, euler_a, heun, dpm2, dpm++2s_a, dpm++2m, dpm++2mv2, ipndm, ipndm_v, lcm, ddim_trailing, tcd, gradient_estimation}\n");
printf(" (high noise) sampling method (default: \"euler_a\")\n");
printf(" --high-noise-steps STEPS (high noise) number of sample steps (default: -1 = auto)\n");
printf(" SLG will be enabled at step int([STEPS]*[START]) and disabled at int([STEPS]*[END])\n");
Expand Down Expand Up @@ -535,6 +536,7 @@ void parse_args(int argc, const char** argv, SDParams& params) {
{"", "--skip-layer-start", "", &params.sample_params.guidance.slg.layer_start},
{"", "--skip-layer-end", "", &params.sample_params.guidance.slg.layer_end},
{"", "--eta", "", &params.sample_params.eta},
{"", "--ge-gamma", "", &params.sample_params.ge_gamma},
{"", "--high-noise-cfg-scale", "", &params.high_noise_sample_params.guidance.txt_cfg},
{"", "--high-noise-img-cfg-scale", "", &params.high_noise_sample_params.guidance.img_cfg},
{"", "--high-noise-guidance", "", &params.high_noise_sample_params.guidance.distilled_guidance},
Expand Down
13 changes: 12 additions & 1 deletion stable-diffusion.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -59,6 +59,7 @@ const char* sampling_methods_str[] = {
"DDIM \"trailing\"",
"TCD",
"Euler A",
"Gradient Estimation",
};

/*================================================== Helper Functions ================================================*/
Expand Down Expand Up @@ -1071,6 +1072,7 @@ class StableDiffusionGGML {
float eta,
int shifted_timestep,
sample_method_t method,
float ge_gamma,
const std::vector<float>& sigmas,
int start_merge_step,
SDCondition id_cond,
Expand Down Expand Up @@ -1299,7 +1301,7 @@ class StableDiffusionGGML {
return denoised;
};

sample_k_diffusion(method, denoise, work_ctx, x, sigmas, rng, eta);
sample_k_diffusion(method, denoise, work_ctx, x, sigmas, rng, eta, ge_gamma);

if (inverse_noise_scaling) {
x = denoiser->inverse_noise_scaling(sigmas[sigmas.size() - 1], x);
Expand Down Expand Up @@ -1670,6 +1672,7 @@ const char* sample_method_to_str[] = {
"ddim_trailing",
"tcd",
"euler_a",
"gradient_estimation",
};

const char* sd_sample_method_name(enum sample_method_t sample_method) {
Expand Down Expand Up @@ -1812,6 +1815,7 @@ void sd_sample_params_init(sd_sample_params_t* sample_params) {
sample_params->scheduler = DEFAULT;
sample_params->sample_method = SAMPLE_METHOD_DEFAULT;
sample_params->sample_steps = 20;
sample_params->ge_gamma = 2.0f;
}

char* sd_sample_params_to_str(const sd_sample_params_t* sample_params) {
Expand All @@ -1832,6 +1836,7 @@ char* sd_sample_params_to_str(const sd_sample_params_t* sample_params) {
"sample_method: %s, "
"sample_steps: %d, "
"eta: %.2f, "
"ge_gamma: %.2f, "
"shifted_timestep: %d)",
sample_params->guidance.txt_cfg,
isfinite(sample_params->guidance.img_cfg)
Expand All @@ -1846,6 +1851,7 @@ char* sd_sample_params_to_str(const sd_sample_params_t* sample_params) {
sd_sample_method_name(sample_params->sample_method),
sample_params->sample_steps,
sample_params->eta,
sample_params->ge_gamma,
sample_params->shifted_timestep);

return buf;
Expand Down Expand Up @@ -1979,6 +1985,7 @@ sd_image_t* generate_image_internal(sd_ctx_t* sd_ctx,
int width,
int height,
enum sample_method_t sample_method,
float ge_gamma,
const std::vector<float>& sigmas,
int64_t seed,
int batch_count,
Expand Down Expand Up @@ -2266,6 +2273,7 @@ sd_image_t* generate_image_internal(sd_ctx_t* sd_ctx,
eta,
shifted_timestep,
sample_method,
ge_gamma,
sigmas,
start_merge_step,
id_cond,
Expand Down Expand Up @@ -2570,6 +2578,7 @@ sd_image_t* generate_image(sd_ctx_t* sd_ctx, const sd_img_gen_params_t* sd_img_g
width,
height,
sample_method,
sd_img_gen_params->sample_params.ge_gamma,
sigmas,
seed,
sd_img_gen_params->batch_count,
Expand Down Expand Up @@ -2902,6 +2911,7 @@ SD_API sd_image_t* generate_video(sd_ctx_t* sd_ctx, const sd_vid_gen_params_t* s
sd_vid_gen_params->high_noise_sample_params.eta,
sd_vid_gen_params->high_noise_sample_params.shifted_timestep,
sd_vid_gen_params->high_noise_sample_params.sample_method,
sd_vid_gen_params->high_noise_sample_params.ge_gamma,
high_noise_sigmas,
-1,
{},
Expand Down Expand Up @@ -2938,6 +2948,7 @@ SD_API sd_image_t* generate_video(sd_ctx_t* sd_ctx, const sd_vid_gen_params_t* s
sd_vid_gen_params->sample_params.eta,
sd_vid_gen_params->sample_params.shifted_timestep,
sd_vid_gen_params->sample_params.sample_method,
sd_vid_gen_params->sample_params.ge_gamma,
sigmas,
-1,
{},
Expand Down
2 changes: 2 additions & 0 deletions stable-diffusion.h
Original file line number Diff line number Diff line change
Expand Up @@ -48,6 +48,7 @@ enum sample_method_t {
DDIM_TRAILING,
TCD,
EULER_A,
GRADIENT_ESTIMATION,
SAMPLE_METHOD_COUNT
};

Expand Down Expand Up @@ -186,6 +187,7 @@ typedef struct {
enum sample_method_t sample_method;
int sample_steps;
float eta;
float ge_gamma;
int shifted_timestep;
} sd_sample_params_t;

Expand Down
Loading